diff --git a/.github/workflows/jekyll.yml b/.github/workflows/jekyll.yml index b7afa5c..0b8ff47 100644 --- a/.github/workflows/jekyll.yml +++ b/.github/workflows/jekyll.yml @@ -9,8 +9,12 @@ name: Deploy Jekyll site to Pages on: # Runs on pushes targeting the default branch push: - branches: ["main"] - + branches: + - main + - w25 + - w24 + - w23 + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -32,23 +36,23 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Ruby - uses: ruby/setup-ruby@55283cc23133118229fd3f97f9336ee23a179fcf # v1.146.0 + uses: ruby/setup-ruby@v1 with: ruby-version: '3.1' # Not needed with a .ruby-version file bundler-cache: true # runs 'bundle install' and caches installed gems automatically cache-version: 0 # Increment this number if you need to re-download cached gems - name: Setup Pages id: pages - uses: actions/configure-pages@v3 + uses: actions/configure-pages@v4 - name: Build with Jekyll # Outputs to the './_site' directory by default run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" env: JEKYLL_ENV: production - name: Checkout W23 - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: w23 clean: false @@ -58,7 +62,7 @@ jobs: env: JEKYLL_ENV: production - name: Checkout W24 - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: ref: w24 clean: false @@ -67,9 +71,19 @@ jobs: run: bundle exec jekyll build env: JEKYLL_ENV: production + - name: Checkout W25 + uses: actions/checkout@v4 + with: + ref: w25 + clean: false + - name: Build W25 with Jekyll + # Outputs to the './_site/w25' directory by default + run: bundle exec jekyll build + env: + JEKYLL_ENV: production - name: Upload artifact # Automatically uploads an artifact from the './_site' directory by default - uses: actions/upload-pages-artifact@v2 + uses: actions/upload-pages-artifact@v3 # Deployment job deploy: @@ -81,4 +95,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v2 + uses: actions/deploy-pages@v4 diff --git a/_config.yml b/_config.yml index d9111ce..7a6e008 100644 --- a/_config.yml +++ b/_config.yml @@ -13,20 +13,21 @@ # you will see them accessed via {{ site.title }}, {{ site.github_repo }}, and so on. # You can create any custom variable you would like, and they will be accessible # in the templates via {{ site.myvariable }}. -title: DeepRob +title: "DeepRob: Deep Learning for Robot Perception" tagline: Deep Learning for Robot Perception description: This course covers the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision that enable robots to physically manipulate objects author: Anthony Opipari -baseurl: '' # the subpath of your site, e.g. /blog +baseurl: '/w25' # the subpath of your site, e.g. /blog url: 'https://deeprob.org' # the base hostname & protocol for your site, e.g. http://example.com exclude: ["Gemfile", "Gemfile.lock", "LICENSE"] logo: "/assets/logos/favicons/UMich_favicon_light.png" +destination: _site/w25/ ga_tracking: G-FX9SVF3WSQ ga_tracking_anonymize_ip: true # Use GDPR compliant Google Analytics settings (true by default) # Theme settings -remote_theme: just-the-docs/just-the-docs@v0.4.0.rc2 +remote_theme: just-the-docs/just-the-docs@v0.7.0 color_scheme: mich search_enabled: true heading_anchors: true @@ -34,14 +35,14 @@ permalink: pretty aux_links_new_tab: true aux_links: Forum: - - 'https://edstem.org/us/courses/31008/discussion/' - Office Hours: - - 'https://eecsoh.eecs.umich.edu/queues/2HK9nZeBgJBEPF6k84hbys3PNoK' + - https://piazza.com/class/m4pgejar4ua2qf + OH Queue: + - https://oh.eecs.umich.edu/course_queues/884 Autograder: - - 'https://autograder.io/web/course/199' - Gradescope: - - 'https://www.gradescope.com/courses/480760' - 'Link to Michigan Robotics website': + - https://autograder.io/web/course/299 + Google Drive: + - https://drive.google.com/drive/folders/1vOz1SA_fb1ebIe0JIaFMHA3AdwKkh7sG?usp=drive_link + 'Link to Michigan Robotics website': - 'https://robotics.umich.edu/' @@ -58,7 +59,7 @@ defaults: type: staffers values: layout: staffer - subpath: '/assets/images/' + subpath: '/assets/' - scope: path: '' type: modules @@ -77,7 +78,10 @@ defaults: callouts: highlight: - color: red + color: yellow + note: + title: Note + color: purple compress_html: clippings: all diff --git a/_includes/components/sidebar.html b/_includes/components/sidebar.html index 21c1d88..c6e23f4 100644 --- a/_includes/components/sidebar.html +++ b/_includes/components/sidebar.html @@ -12,7 +12,7 @@ | where_exp:"item", "item.nav_exclude != true" | size %} {% if pages_top_size > 0 %} - {% include nav.html pages=site.html_pages key=nil %} + {% include components/nav.html pages=site.html_pages key=nil %} {% endif %} {%- if site.nav_external_links -%} {% else %} - {% include nav.html pages=collection key=collection_key %} + {% include components/nav.html pages=collection key=collection_key %} {% endif %} {% else %} - {% include nav.html pages=collection key=collection_key %} + {% include components/nav.html pages=collection key=collection_key %} {% endif %} {% endif %} {% endfor %} diff --git a/_includes/footer_custom.html b/_includes/footer_custom.html index 74642ad..1feeb8b 100644 --- a/_includes/footer_custom.html +++ b/_includes/footer_custom.html @@ -19,11 +19,11 @@ --> - +
diff --git a/_includes/head_custom.html b/_includes/head_custom.html index cb8f697..fff8001 100644 --- a/_includes/head_custom.html +++ b/_includes/head_custom.html @@ -1,4 +1,4 @@ - + diff --git a/_includes/title.html b/_includes/title.html index d305df6..b3cefad 100644 --- a/_includes/title.html +++ b/_includes/title.html @@ -1,5 +1,9 @@ {% if site.logo %} - {{ site.title }} +
+ + +
{% else %} {{ site.title }} {% endif %} \ No newline at end of file diff --git a/_layouts/minimal.html b/_layouts/minimal.html index f5c7e59..c626f56 100644 --- a/_layouts/minimal.html +++ b/_layouts/minimal.html @@ -25,7 +25,7 @@ | where_exp:"item", "item.nav_exclude != true" | size %} {% if pages_top_size > 0 %} - {% include nav.html pages=site.html_pages key=nil %} + {% include components/nav.html pages=site.html_pages key=nil %} {% endif %} {% if site.just_the_docs.collections %} {% assign collections_size = site.just_the_docs.collections | size %} @@ -34,7 +34,7 @@ {% assign collection_value = collection_entry[1] %} {% assign collection = site[collection_key] %} {% if collection_value.nav_exclude != true %} - {% include nav.html pages=collection key=collection_key %} + {% include components/nav.html pages=collection key=collection_key %} {% endif %} {% endfor %} {% endif %} diff --git a/_layouts/project.html b/_layouts/project.html index 855fa7e..e0ec699 100644 --- a/_layouts/project.html +++ b/_layouts/project.html @@ -27,8 +27,22 @@

{{ page.title }}

{% else %} {{author.name}} {% endif %} -
- {{author.affiliation}} + {% if author.affiliation %} +
+ {{author.affiliation}} + {% endif %} + {% if author.year %} +
+ {{author.year}} + {% endif %} + {% if author.hometown %} +
+ {{author.hometown}} + {% endif %} + {% if author.email %} +
+ {{author.email}} + {% endif %}
{% endfor %} diff --git a/_layouts/staffer.html b/_layouts/staffer.html index 6978d97..366bbe7 100644 --- a/_layouts/staffer.html +++ b/_layouts/staffer.html @@ -3,7 +3,7 @@ {% endif %}
-

+

{% if page.website %} {{ page.name }} {% else %} @@ -12,18 +12,18 @@

{% if page.pronouns %} {{ page.pronouns }} {% endif %} -

+ {% if page.email %}

{{ page.email }}

{% endif %} - {% if page.role == "Instructor" or page.role == "Research Associate" or page.role == "Instructional Aide" %} + {% if page.role == "Instructor" or page.role == "Graduate Student Instructor" or page.role == "Instructional Assistant" %} {% if page.section %}

Quiz Section: {{ page.section | markdownify | strip_html }}

{% endif %} {% if page.office-hours %}

- Office Hours: {{ page.office-hours | markdownify | strip_html }} + Office Hours: {{ page.office-hours | markdownify | strip_html }}

{% endif %} {% elsif page.role == "Collaborating Instructor" %} diff --git a/_modules/week-01.md b/_modules/week-01.md index be0e3a1..5e3b1a6 100644 --- a/_modules/week-01.md +++ b/_modules/week-01.md @@ -2,10 +2,12 @@ title: Week 1 --- -Jan 11 -: [**Lec 1**](#lec-1){: #lec-1 .label .label-purple } Course Introduction - :   -: [**Project 0 out**{: .label .label-yellow }](/syllabus/#programming-projects){: .project-link} - : [πŸ“– GBC Ch. 1](https://www.deeplearningbook.org/contents/intro.html){: target="_blank" rel="noopener noreferrer"} + +Jan 8 +: [**Lec 1**]({{ site.baseurl }}/assets/slides/deeprob_lecture_01_introduction.pdf){: #lec-1 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Course Introduction**]({{ site.baseurl }}/assets/slides/deeprob_lecture_01_introduction.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/fwGqzo){: target="_blank" rel="noopener noreferrer"} +: [**Project 0 out**{: .label .label-yellow }]({{ site.baseurl }}/projects/project0/){: .project-link} + + diff --git a/_modules/week-02.md b/_modules/week-02.md index c98a6a8..923190e 100644 --- a/_modules/week-02.md +++ b/_modules/week-02.md @@ -2,21 +2,30 @@ title: Week 2 --- -Jan 16 -: [**Lec 2**](#lec-2){: #lec-2 .label .label-purple } Image Classification - :   +Jan 13 +: [**Lec 2**]({{ site.baseurl }}/assets/slides/deeprob_lecture_02_image_classification.pdf){: #lec-2 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Image Classification**]({{ site.baseurl }}/assets/slides/deeprob_lecture_02_image_classification.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/k4WywR){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– GBC Ch. 1](https://www.deeplearningbook.org/contents/intro.html){: target="_blank" rel="noopener noreferrer"} :   : [πŸ“– 231n Image Classification](https://cs231n.github.io/classification/){: target="_blank" rel="noopener noreferrer"} -Jan 17 -: [**Dis 1**](#dis-1){: #dis-1 .label .label-blue } Intro to Python, Pytorch and Colab - :   +Jan 14 +: [**Dis 0**]({{ site.baseurl }}/assets/slides/deeprob_discussion_00.pdf){: #dis-0 .label .label-blue target="_blank" rel="noopener noreferrer" } [**Intro to Python and Pytorch**]({{ site.baseurl }}/assets/slides/deeprob_discussion_00.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/1XpAsw){: target="_blank" rel="noopener noreferrer"} +:   + -Jan 18 -: [**Lec 3**](#lec-3){: #lec-3 .label .label-purple } Linear Classifiers - :   -: [**Project 0 due**{: .label .label-red }](/syllabus/#programming-projects){: .project-link} [**Project 1 out**{: .label .label-yellow }](/syllabus/#programming-projects){: .project-link} +Jan 15 +: [**Lec 3**]({{ site.baseurl }}/assets/slides/deeprob_lecture_03_linear_classifiers.pdf){: #lec-3 .label .label-purple target="_blank" rel="noopener noreferrer"} [**Linear Classifiers**]({{ site.baseurl }}/assets/slides/deeprob_lecture_03_linear_classifiers.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/h7xdMy){: target="_blank" rel="noopener noreferrer"} +: [**Project 1 out**{: .label .label-yellow }]({{ site.baseurl }}/projects/project1/){: .project-link} : [πŸ“– 231n Linear Classification](https://cs231n.github.io/linear-classify/){: target="_blank" rel="noopener noreferrer"} + + +Jan 19 +: [**Project 0 due**{: .label .label-red }]({{ site.baseurl }}/projects/project0/){: .project-link} +:   \ No newline at end of file diff --git a/_modules/week-03.md b/_modules/week-03.md index 5143e8a..9189bd3 100644 --- a/_modules/week-03.md +++ b/_modules/week-03.md @@ -2,38 +2,31 @@ title: Week 3 --- -Jan 23 -: [**Lec 4**](#lec-4){: #lec-4 target="_blank" rel="noopener noreferrer" .label .label-purple } Regularization + Optimization - :   -: [**Quiz 1**{: .label .label-orange }](#lec-4){: .project-link target="_blank" rel="noopener noreferrer"} - : [πŸ“– 231n Optimization](https://cs231n.github.io/optimization-1/){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– GBC Sec 8.1-8.6](https://www.deeplearningbook.org/contents/optimization.html){: target="_blank" rel="noopener noreferrer"} +Jan 20 +: [**MLK Day-No Class**](https://oami.umich.edu/mlk-symposium/){: target="_blank" rel="noopener noreferrer" } :   - : [πŸ“– Li et al., Visualizing Loss Landscaps](https://arxiv.org/abs/1712.09913){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– Goh, Why Momentum Really Works](https://distill.pub/2017/momentum/){: target="_blank" rel="noopener noreferrer"} - -Jan 24 -: [**Dis 2**](#dis-2){: #dis-2 target="_blank" rel="noopener noreferrer" .label .label-blue } Intro to PROPS Dataset - :   +Jan 21 +: [**Dis 1**]({{ site.baseurl }}/assets/slides/deeprob_discussion_01.pdf){: #dis-1 target="_blank" rel="noopener noreferrer" .label .label-blue } [**Datasets + P1 Primer**]({{ site.baseurl }}/assets/slides/deeprob_discussion_01.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/dM3dZY){: target="_blank" rel="noopener noreferrer"} :   : [πŸ“– Chen et al., ProgressLabeller](https://arxiv.org/abs/2203.00283){: target="_blank" rel="noopener noreferrer"} -Jan 25 -: [**Lec 5**](#lec-5){: #lec-5 target="_blank" rel="noopener noreferrer" .label .label-purple } Neural Networks - :   +Jan 22 +: [**Lec 4**]({{ site.baseurl }}/assets/slides/deeprob_lecture_04_optimization.pdf){: #lec-4 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Regularization + Optimization**]({{ site.baseurl }}/assets/slides/deeprob_lecture_04_optimization.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/PF88qo){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– 231n Optimization](https://cs231n.github.io/optimization-1/){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– 231n Neural Networks](https://cs231n.github.io/neural-networks-1/){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– GBC Sec 8.1-8.6](https://www.deeplearningbook.org/contents/optimization.html){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– GBC Sec 6.1-6.4](https://www.deeplearningbook.org/contents/mlp.html){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– Li et al., Visualizing Loss Landscaps](https://arxiv.org/abs/1712.09913){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Nielsen, Universal Approximation](http://neuralnetworksanddeeplearning.com/chap4.html){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– Goh, Why Momentum Really Works](https://distill.pub/2017/momentum/){: target="_blank" rel="noopener noreferrer"} diff --git a/_modules/week-04.md b/_modules/week-04.md index 4444da2..fa0b2e7 100644 --- a/_modules/week-04.md +++ b/_modules/week-04.md @@ -2,48 +2,38 @@ title: Week 4 --- -Jan 30 -: [**Lec 6**](#lec-6){: #lec-6 target="_blank" rel="noopener noreferrer" .label .label-purple } Backpropagation - :   -: [**Quiz 2**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer" } - : [πŸ“– cs231n Backpropagation](https://cs231n.github.io/optimization-2/){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– GBC Sec 6.5-6.6](https://www.deeplearningbook.org/contents/mlp.html#pf25){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– Olah, Backpropagation](http://colah.github.io/posts/2015-08-Backprop/){: target="_blank" rel="noopener noreferrer"} -Jan 31 -: [**Dis 3**](#dis-3){: #lab-3 target="_blank" rel="noopener noreferrer" .label .label-blue } How to Read Research Papers - :   -:   - : [πŸ“– Krizhevsky et al., AlexNet](https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html){: target="_blank" rel="noopener noreferrer"} +Jan 27 +: [**Lec 5**]({{ site.baseurl }}/assets/slides/deeprob_lecture_05_neural_networks.pdf){: #lec-5 target="_blank" rel="noopener noreferrer" .label .label-purple } [**Neural Networks I**]({{ site.baseurl }}/assets/slides/deeprob_lecture_05_neural_networks.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/4SXQ4D){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Xiang et al., PoseCNN](https://arxiv.org/abs/1711.00199){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– 231n Neural Networks](https://cs231n.github.io/neural-networks-1/){: target="_blank" rel="noopener noreferrer" } :   - : [πŸ“– Mildenhall et al., NeRF](https://arxiv.org/abs/2003.08934){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– GBC Sec 6.1-6.4](https://www.deeplearningbook.org/contents/mlp.html){: target="_blank" rel="noopener noreferrer"} -Feb 1 -: [**Lec 7**](#lec-7){: #lec-7 target="_blank" rel="noopener noreferrer" .label .label-purple } Convolutional Neural Networks - :   -: [**Quiz 3**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer" } [**Project 1 due**{: .label .label-red }](/projects/project1/){: .project-link} [**Project 2 out**{: .label .label-yellow }](/projects/project2/){: .project-link} - : [πŸ“– cs231n ConvNets](https://cs231n.github.io/convolutional-networks/){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– GBC Ch. 9](https://www.deeplearningbook.org/contents/convnets.html){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– Araujo et al., Receptive Fields of CNNs](https://distill.pub/2019/computing-receptive-fields/){: target="_blank" rel="noopener noreferrer"} +Jan 28 +: [**Dis 2**]({{ site.baseurl }}/assets/slides/deeprob_discussion_02.pdf){: #dis-2 target="_blank" rel="noopener noreferrer" .label .label-blue } [**Neural Networks II**]({{ site.baseurl }}/assets/slides/deeprob_discussion_02.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/n0RJiS){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Olah et al., Feature Visualization](https://distill.pub/2017/feature-visualization/){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– Nielsen, Universal Approximation](http://neuralnetworksanddeeplearning.com/chap4.html){: target="_blank" rel="noopener noreferrer"} +Jan 29 +: [**Lec 6**]({{ site.baseurl }}/assets/slides/deeprob_lecture_06_backpropagation.pdf){: #lec-6 target="_blank" rel="noopener noreferrer" .label .label-purple } [**Backpropagation**]({{ site.baseurl }}/assets/slides/deeprob_lecture_06_backpropagation.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/06KI1l){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– cs231n Backpropagation](https://cs231n.github.io/optimization-2/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– GBC Sec 6.5-6.6](https://www.deeplearningbook.org/contents/mlp.html#pf25){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Olah, Backpropagation](http://colah.github.io/posts/2015-08-Backprop/){: target="_blank" rel="noopener noreferrer"} -Jan 27 -: [**Dis 4**](/assets/slides/deeprob_discussion_04.pdf){: #dis-4 target="_blank" rel="noopener noreferrer" .label .label-blue }[Overview of Final Project Topics I](/assets/slides/deeprob_discussion_04.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/xlydm11pIFg){: target="_blank" rel="noopener noreferrer"} -: [**Paper Selection Survey Out**{: .label .label-yellow }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} - : [πŸ“– Vannevar Bush, Science The Endless Frontier](https://www.nsf.gov/od/lpa/nsf50/vbush1945.htm){: target="_blank" rel="noopener noreferrer"} +Feb 2 +: [**Project 1 due**{: .label .label-red }]({{ site.baseurl }}/projects/project1/){: .project-link} [**Project 2 out**]({{ site.baseurl }}/projects/project2/){: .label .label-yellow } +:   diff --git a/_modules/week-05.md b/_modules/week-05.md index 96870a3..8409a10 100644 --- a/_modules/week-05.md +++ b/_modules/week-05.md @@ -2,10 +2,34 @@ title: Week 5 --- -Jan 31 -: [**Lec 8**](/assets/slides/deeprob_08_cnn_architectures.pdf){: #lec-8 .label .label-purple target="_blank" rel="noopener noreferrer"}[CNN Architectures](/assets/slides/deeprob_08_cnn_architectures.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/-HtdgIPY23g){: target="_blank" rel="noopener noreferrer"} -: [**Quiz 4**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} +Feb 3 +: [**Lec 7**]({{ site.baseurl }}/assets/slides/deeprob_lecture_07_convolutional_networks.pdf){: #lec-7 target="_blank" rel="noopener noreferrer" .label .label-purple } [**Convolutional Neural Networks**]({{ site.baseurl }}/assets/slides/deeprob_lecture_07_convolutional_networks.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/ZXhFuv){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– cs231n ConvNets](https://cs231n.github.io/convolutional-networks/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– GBC Ch. 9](https://www.deeplearningbook.org/contents/convnets.html){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Araujo et al., Receptive Fields of CNNs](https://distill.pub/2019/computing-receptive-fields/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Olah et al., Feature Visualization](https://distill.pub/2017/feature-visualization/){: target="_blank" rel="noopener noreferrer"} + + + + +Feb 4 +: [**Dis 3**]({{ site.baseurl }}/assets/slides/deeprob_discussion_03.pdf){: #dis-3 target="_blank" rel="noopener noreferrer" .label .label-blue } [**Local Jupyter Notebook Demo**]({{ site.baseurl }}/assets/slides/deeprob_discussion_03.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://drive.google.com/file/d/1xuipsZGsWB-UtKu514vYfGyrINdW5jGr/view){: target="_blank" rel="noopener noreferrer"} +:   + + + + + +Feb 5 +: [**Lec 8**]({{ site.baseurl }}/assets/slides/deeprob_lecture_08_cnn_architectures.pdf){: #lec-8 target="_blank" rel="noopener noreferrer" .label .label-purple } [**CNN Architectures**]({{ site.baseurl }}/assets/slides/deeprob_lecture_08_cnn_architectures.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/oltvbP){: target="_blank" rel="noopener noreferrer"} +:   : [πŸ“– Krizhevsky et al., AlexNet](https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html){: target="_blank" rel="noopener noreferrer"} :   : [πŸ“– Simonyan and Zisserman, VGG](https://arxiv.org/abs/1409.1556){: target="_blank" rel="noopener noreferrer"} @@ -15,17 +39,6 @@ Jan 31 : [πŸ“– He et al., ResNet](https://arxiv.org/abs/1512.03385){: target="_blank" rel="noopener noreferrer"} -Feb 2 -: [**Lec 9**](/assets/slides/deeprob_09_training_neural_networks_1.pdf){: #lec-9 .label .label-purple target="_blank" rel="noopener noreferrer"}[Training Neural Networks I](/assets/slides/deeprob_09_training_neural_networks_1.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/aBobLAvSy5s){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– cs231n Training I](https://cs231n.github.io/neural-networks-2/){: target="_blank" rel="noopener noreferrer"} -:   - : [πŸ“– Olah et al., Equivariance in Networks](https://distill.pub/2020/circuits/equivariance/){: target="_blank" rel="noopener noreferrer"} -Feb 3 -: [**Dis 5**](/assets/slides/deeprob_discussion_05.pdf){: #dis-5 .label .label-blue target="_blank" rel="noopener noreferrer"}[Overview of Final Project Topics II](/assets/slides/deeprob_discussion_05.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/NlYzo1goLPQ){: target="_blank" rel="noopener noreferrer"} -: [**Paper Selection Survey Due**{: .label .label-red }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} diff --git a/_modules/week-06.md b/_modules/week-06.md index 82855bd..6fcdf38 100644 --- a/_modules/week-06.md +++ b/_modules/week-06.md @@ -2,29 +2,44 @@ title: Week 6 --- -Feb 7 -: [**Lec 10**](/assets/slides/deeprob_10_training_neural_networks_2.pdf){: #lec-10 .label .label-purple target="_blank" rel="noopener noreferrer" }[Training Neural Networks II](/assets/slides/deeprob_10_training_neural_networks_2.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/B6NK6_4pqgU){: target="_blank" rel="noopener noreferrer"} -: [**Quiz 5**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} [**Final Project out**{: .label .label-yellow }](/projects/finalproject/){: .project-link} - : [πŸ“– cs231n Training II](https://cs231n.github.io/neural-networks-3/){: target="_blank" rel="noopener noreferrer"} + + +Feb 10 +: [**Lec 9**]({{ site.baseurl }}/assets/slides/deeprob_lecture_09_training_neural_networks.pdf){: #lec-9 .label .label-purple target="_blank" rel="noopener noreferrer"} [**Training Neural Networks I**]({{ site.baseurl }}/assets/slides/deeprob_lecture_09_training_neural_networks.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/2PMvaA){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Goodfellow et al., Adversarial Examples](https://arxiv.org/abs/1412.6572){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– cs231n Training I](https://cs231n.github.io/neural-networks-2/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Olah et al., Equivariance in Networks](https://distill.pub/2020/circuits/equivariance/){: target="_blank" rel="noopener noreferrer"} -Feb 9 -: [**Lec 11**](/assets/slides/deeprob_11_deep_learning_software.pdf){: #lec-11 .label .label-purple target="_blank" rel="noopener noreferrer" }[Deep Learning Software](/assets/slides/deeprob_11_deep_learning_software.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/M24mYrH8K40){: target="_blank" rel="noopener noreferrer"} -: [**Quiz 6**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} - : [πŸ“– Redmon, Darknet](https://pjreddie.com/darknet/){: target="_blank" rel="noopener noreferrer"} + + + +Feb 11 +: **Dis 4**{: #dis-4 .label .label-blue target="_blank" rel="noopener noreferrer" } **P2 Help** + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/oUpfVH){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Jia et al., Caffe](https://arxiv.org/abs/1408.5093){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– Krizhevsky et al., AlexNet](https://papers.nips.cc/paper/2012/hash/c399862d3b9d6b76c8436e924a68c45b-Abstract.html){: target="_blank" rel="noopener noreferrer"} + + + + + +Feb 12 +: [**Lec 10**]({{ site.baseurl }}/assets/slides/deeprob_lecture_10_training_neural_networks.pdf){: #lec-10 .label .label-purple target="_blank" rel="noopener noreferrer"} [**Training Neural Networks II**]({{ site.baseurl }}/assets/slides/deeprob_lecture_10_training_neural_networks.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/iQwcUy){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Abadi et al., TensorFlow](https://arxiv.org/abs/1603.04467){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– cs231n Training II](https://cs231n.github.io/neural-networks-3/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Goodfellow et al., Adversarial Examples](https://arxiv.org/abs/1412.6572){: target="_blank" rel="noopener noreferrer"} + + + + + +Feb 16 +: [**Project 2 due**]({{ site.baseurl }}/projects/project2/){: .label .label-red } [**Project 3 out**]({{ site.baseurl }}/projects/project3/){: .label .label-yellow } :   - : [πŸ“– Paszke et al., PyTorch](https://arxiv.org/abs/1912.01703){: target="_blank" rel="noopener noreferrer"} -Feb 10 -: [**Dis 6**](/assets/slides/deeprob_discussion_06.pdf){: #dis-6 .label .label-blue target="_blank" rel="noopener noreferrer" }[How to Present Research Papers](/assets/slides/deeprob_discussion_06.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/DFS8J0I7dbU){: target="_blank" rel="noopener noreferrer"} -: [**Project 2 due**{: .label .label-red }](/projects/project2/){: .project-link} [**Project 3 out**{: .label .label-yellow }](/projects/project3/){: .project-link} diff --git a/_modules/week-07.md b/_modules/week-07.md index 8fd231a..fa0addb 100644 --- a/_modules/week-07.md +++ b/_modules/week-07.md @@ -2,10 +2,11 @@ title: Week 7 --- -Feb 14 -: [**Lec 12**](/assets/slides/deeprob_12_object_detection.pdf){: #lec-12 .label .label-purple target="_blank" rel="noopener noreferrer" }[Object Detection](/assets/slides/deeprob_12_object_detection.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/VX6tiNEVTnk){: target="_blank" rel="noopener noreferrer"} -: [**Quiz 7**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} + +Feb 17 +: [**Lec 11**]({{ site.baseurl }}/assets/slides/deeprob_lecture_11_object_detection.pdf){: #lec-11 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Object Detection I**]({{ site.baseurl }}/assets/slides/deeprob_lecture_11_object_detection.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/jNvaio){: target="_blank" rel="noopener noreferrer"} +:   : [πŸ“– Girshick et al., R-CNN](https://arxiv.org/abs/1311.2524){: target="_blank" rel="noopener noreferrer"} :   : [πŸ“– Redmon et al., YOLO](https://arxiv.org/abs/1506.02640){: target="_blank" rel="noopener noreferrer"} @@ -14,17 +15,27 @@ Feb 14 -Feb 16 -: [**Lec 13**](/assets/slides/deeprob_13_object_detectors_and_segmentation.pdf){: #lec-13 .label .label-purple target="_blank" rel="noopener noreferrer" } [Object Detectors & Segmentation](/assets/slides/deeprob_13_object_detectors_and_segmentation.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/dpgSUSn8FS0){: target="_blank" rel="noopener noreferrer"} -: [**Quiz 8**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} - : [πŸ“– Girshick, Fast R-CNN](https://arxiv.org/abs/1504.08083){: target="_blank" rel="noopener noreferrer"} + +Feb 18 +: **Dis 5**{: #dis-5 .label .label-blue target="_blank" rel="noopener noreferrer" } **P3 & SAM2** + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/2yZAs5){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Ren et al., Faster R-CNN](https://arxiv.org/abs/1506.01497){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– cs231n Training II](https://cs231n.github.io/neural-networks-3/){: target="_blank" rel="noopener noreferrer"} + + + + +Feb 19 +: [**Lec 12**]({{ site.baseurl }}/assets/slides/deeprob_lecture_12_object_detection.pdf){: #lec-12 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Object Detection II**]({{ site.baseurl }}/assets/slides/deeprob_lecture_12_object_detection.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/Ztup0G){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Redmon, Darknet](https://pjreddie.com/darknet/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Jia et al., Caffe](https://arxiv.org/abs/1408.5093){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– He et al., Mask R-CNN](https://arxiv.org/abs/1703.06870){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– Abadi et al., TensorFlow](https://arxiv.org/abs/1603.04467){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Paszke et al., PyTorch](https://arxiv.org/abs/1912.01703){: target="_blank" rel="noopener noreferrer"} + -Feb 17 -: [**Dis 7**](/assets/slides/deeprob_discussion_07.pdf){: #dis-7 .label .label-blue target="_blank" rel="noopener noreferrer" }[Prelude to 3D Perception](/assets/slides/deeprob_discussion_07.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/P5dkSYbsXQ8){: target="_blank" rel="noopener noreferrer"} diff --git a/_modules/week-08.md b/_modules/week-08.md index 17c2f21..e4ac9d4 100644 --- a/_modules/week-08.md +++ b/_modules/week-08.md @@ -1,19 +1,33 @@ --- -title: Week 8; 3D Perception +title: Week 8 --- -Feb 21 -: [**Lec 14**](/assets/slides/deeprob_14_rgbd_architectures.pdf){: #lec-14 .label .label-purple target="_blank" rel="noopener noreferrer" }[RGB-D Architectures](/assets/slides/deeprob_14_rgbd_architectures.pdf){: target="_blank" rel="noopener noreferrer"} - : [πŸ“ƒ Related Papers](/papers/#rgb-d-architectures) -Feb 23 -: [**Lec 15**](/assets/slides/deeprob_15_point_cloud_processing.pdf){: #lec-15 .label .label-purple target="_blank" rel="noopener noreferrer" }[Point Clouds, PointNet, and PointNet++](/assets/slides/deeprob_15_point_cloud_processing.pdf){: target="_blank" rel="noopener noreferrer"} - : [πŸ“ƒ Related Papers](/papers/#pointcloud-processing) +Feb 24 +: [**Lec 13**]({{ site.baseurl }}/assets/slides/deeprob_lecture_13_pose_estimation.pdf){: #lec-13 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Pose Estimation**]({{ site.baseurl }}/assets/slides/deeprob_lecture_13_pose_estimation.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/wHNmay){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Xiang et al., PoseCNN](https://arxiv.org/abs/1711.00199){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Mildenhall et al., NeRF](https://arxiv.org/abs/2003.08934){: target="_blank" rel="noopener noreferrer"} + + + +Feb 25 +: [**Dis 6**]({{ site.baseurl }}/assets/slides/deeprob_discussion_06.pdf){: #dis-6 .label .label-blue target="_blank" rel="noopener noreferrer" } [**How to Read Research Papers**]({{ site.baseurl }}/assets/slides/deeprob_discussion_06.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/3EE1M0){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Vannevar Bush, Science The Endless Frontier](https://www.nsf.gov/od/lpa/nsf50/vbush1945.htm){: target="_blank" rel="noopener noreferrer"} + + + + +Feb 26 +: [**Lec 14**]({{ site.baseurl }}/assets/slides/deeprob_lecture_14_final_project_showcase.pdf){: #lec-14 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Final Project Topics**]({{ site.baseurl }}/assets/slides/deeprob_lecture_14_final_project_showcase.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/6Lj0um){: target="_blank" rel="noopener noreferrer"} + -Feb 24 -: [**Dis 8**](/assets/slides/deeprob_discussion_08.pdf){: #dis-8 .label .label-blue target="_blank" rel="noopener noreferrer" }[Prelude to Rigid Body Objects](/assets/slides/deeprob_discussion_08.pdf){: target="_blank" rel="noopener noreferrer"} - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/4jINQitl4sY){: target="_blank" rel="noopener noreferrer"} \ No newline at end of file diff --git a/_modules/week-08b.md b/_modules/week-08b.md index 97ea187..b8823fe 100644 --- a/_modules/week-08b.md +++ b/_modules/week-08b.md @@ -2,11 +2,15 @@ title: Spring Break --- -Feb 28 + +Mar 3 : **Break**{: .label .bg-grey-dk-000 } -Mar 2 +Mar 4 : **Break**{: .label .bg-grey-dk-000 } -Mar 3 +Mar 5 : **Break**{: .label .bg-grey-dk-000 } + +Mar 9 +: [**Project 3 due**]({{ site.baseurl }}/projects/project3/){: .label .label-red } \ No newline at end of file diff --git a/_modules/week-09.md b/_modules/week-09.md index 9d20735..5b37409 100644 --- a/_modules/week-09.md +++ b/_modules/week-09.md @@ -1,20 +1,31 @@ --- -title: Week 9; Rigid Body Objects +title: Week 9 --- -Mar 7 -: [**Lec 16**](/assets/slides/deeprob_16_objects.pdf){: #lec-16 .label .label-purple target="_blank" rel="noopener noreferrer" }[Object Pose, Geometry, SDF, Implicit Surfaces](/assets/slides/deeprob_16_objects.pdf){: target="_blank" rel="noopener noreferrer"} - : [πŸ“ƒ Related Papers](/papers/#object-pose-geometry-sdf-implicit-surfaces) + -Mar 9 -: [**Lec 17**](/assets/slides/deeprob_17_object_descriptors.pdf){: #lec-17 .label .label-purple target="_blank" rel="noopener noreferrer" }[Dense Descriptors, Category-level Representations](/assets/slides/deeprob_17_object_descriptors.pdf){: target="_blank" rel="noopener noreferrer"} - : [πŸ“ƒ Related Papers](/papers/#dense-descriptors-category-level-representations) - Mar 10 -: [**Dis 9**](/assets/slides/deeprob_discussion_09.pdf){: #dis-9 .label .label-blue target="_blank" rel="noopener noreferrer" }[Prelude to Object Tracking](/assets/slides/deeprob_discussion_09.pdf){: target="_blank" rel="noopener noreferrer" } - : [![](/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://youtu.be/45sKAir2XnI){: target="_blank" rel="noopener noreferrer"} +: [**Lec 15**]({{ site.baseurl }}/assets/slides/deeprob_lecture_15_deep_learning_software.pdf){: #lec-15 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Deep Learning Software**]({{ site.baseurl }}/assets/slides/deeprob_lecture_15_deep_learning_software.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/pKXsWc){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Xiang, PoseCNN](https://arxiv.org/pdf/1711.00199.pdf){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Karpathy, The Unreasonable Effectiveness of RNNs](http://karpathy.github.io/2015/05/21/rnn-effectiveness/){: target="_blank" rel="noopener noreferrer"} + : [πŸ“– Wang, DenseFusion](https://arxiv.org/pdf/1901.04780.pdf){: target="_blank" rel="noopener noreferrer"} + + + + +Mar 11 +: [**Dis 7**]({{ site.baseurl }}/assets/slides/deeprob_discussion_07.pdf){: #dis-7 .label .label-blue target="_blank" rel="noopener noreferrer" } [**Midterm Review**]({{ site.baseurl }}/assets/slides/deeprob_discussion_07.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/vJypEU){: target="_blank" rel="noopener noreferrer"} :   - : [πŸ“– Olah & Carter, Attention and Augmented RNNs](https://distill.pub/2016/augmented-rnns/){: target="_blank" rel="noopener noreferrer"} + + +Mar 12 +: **Lec 16**{: #lec-16 .label .label-purple target="_blank" rel="noopener noreferrer" } **Midterm** + + +Mar 16 +: [**Project 4 out**]({{ site.baseurl }}/projects/project4/){: .label .label-yellow } +:   \ No newline at end of file diff --git a/_modules/week-10.md b/_modules/week-10.md index 7c9832e..4d2b05e 100644 --- a/_modules/week-10.md +++ b/_modules/week-10.md @@ -1,21 +1,39 @@ --- -title: Week 10; Object Tracking +title: Week 10 --- -Mar 14 -: [**Lec 18**](/assets/slides/deeprob_18_object_tracking.pdf){: #lec-18 .label .label-purple target="_blank" rel="noopener noreferrer" }[Recurrent Networks, Object Tracking](/assets/slides/deeprob_18_object_tracking.pdf){: target="_blank" rel="noopener noreferrer"} - : [πŸ“ƒ Related Papers](/papers/#recurrent-networks-and-object-tracking) -: [**Quiz 9**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} [**Project 3 due**{: .label .label-red }](/projects/project3/){: .project-link} + + +Mar 17 +: [**Lec 17**]({{ site.baseurl }}/assets/slides/deeprob_lecture_16_sequences.pdf){: #lec-17 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Sequences**]({{ site.baseurl }}/assets/slides/deeprob_lecture_16_sequences.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/E36OWG){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Andrej Karpathy, Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Lena Voita, NLP Course For You](https://lena-voita.github.io/nlp_course.html){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Vaswani, Attention Is All You Need](https://arxiv.org/pdf/1706.03762.pdf){: target="_blank" rel="noopener noreferrer"} + + + +Mar 18 +: **Dis 8**{: #dis-8 .label .label-blue target="_blank" rel="noopener noreferrer" } **Generative Models** +:   + + + +Mar 19 +: [**Lec 18**]({{ site.baseurl }}/assets/slides/deeprob_lecture_17_attention.pdf){: #lec-18 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Attention, Transformer**]({{ site.baseurl }}/assets/slides/deeprob_lecture_17_attention.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/qQeP8k){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Dosovitskiy, ViT](https://arxiv.org/pdf/2010.11929.pdf){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Carion, DETR](https://arxiv.org/pdf/2005.12872.pdf){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Xie, SegFormer](https://arxiv.org/pdf/2105.15203.pdf){: target="_blank" rel="noopener noreferrer"} -Mar 16 -: [**Lec 19**](/assets/slides/deeprob_19_visual_odometry.pdf){: #lec-19 .label .label-purple target="_blank" rel="noopener noreferrer" }[Visual Odometry](/assets/slides/deeprob_19_visual_odometry.pdf){: target="_blank" rel="noopener noreferrer"} - : [πŸ“ƒ Related Papers](/papers/#visual-odometry-and-localization) -: [**Project 4 out**{: .label .label-yellow }](/projects/project4/){: .project-link} -Mar 17 -: **Dis 10**{: #dis-10 .label .label-blue }Prelude to Scene-Level Representations -: [**Final Project Checkins**{: .label .label-grey }](https://docs.google.com/spreadsheets/d/1hOdZyFN_mxRF0NCV8Rj6NLwLIk3tyXM84noFwGmp_MI/edit?usp=sharing){: .project-link target="_blank" rel="noopener noreferrer" } diff --git a/_modules/week-11.md b/_modules/week-11.md index e10c670..d570b74 100644 --- a/_modules/week-11.md +++ b/_modules/week-11.md @@ -1,20 +1,31 @@ --- -title: Week 11; Scene-Level Representations +title: Week 11 --- -Mar 21 -: [**Lec 20**](/assets/slides/deeprob_20_graph_representations.pdf){: #lec-20 .label .label-purple target="_blank" rel="noopener noreferrer" }[Semantic Scene Graphs](/assets/slides/deeprob_20_graph_representations.pdf){: target="_blank" rel="noopener noreferrer" } - : [πŸ“ƒ Related Papers](/papers/#semantic-scene-graphs-and-explicit-representations) -: [**Quiz 10**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} [**Final Project Checkins**{: .label .label-grey }](https://docs.google.com/spreadsheets/d/1hOdZyFN_mxRF0NCV8Rj6NLwLIk3tyXM84noFwGmp_MI/edit?usp=sharing){: .project-link target="_blank" rel="noopener noreferrer" } +Mar 24 +: [**Lec 19**]({{ site.baseurl }}/assets/slides/deeprob_lecture_18_vision_transformers.pdf){: #lec-19 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Vision Transformers**]({{ site.baseurl }}/assets/slides/deeprob_lecture_18_vision_transformers.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/Cxy7e5){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Devlin, BERT](https://arxiv.org/pdf/1810.04805.pdf){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Radford, CLIP](https://arxiv.org/abs/2103.00020){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Kerr, LERF](https://www.lerf.io){: target="_blank" rel="noopener noreferrer"} -Mar 23 -: [**Lec 21**](/assets/slides/deeprob_21_implicit_representations.pdf){: #lec-21 .label .label-purple target="_blank" rel="noopener noreferrer" }[Neural Radiance Fields](/assets/slides/deeprob_21_implicit_representations.pdf){: target="_blank" rel="noopener noreferrer" } - : [πŸ“ƒ Related Papers](/papers/#neural-radiance-fields-and-implicit-representations) -: [**Quiz 11**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} [**Final Project Checkins**{: .label .label-grey }](https://docs.google.com/spreadsheets/d/1hOdZyFN_mxRF0NCV8Rj6NLwLIk3tyXM84noFwGmp_MI/edit?usp=sharing){: .project-link target="_blank" rel="noopener noreferrer" } +Mar 25 +: **Dis 9**{: #dis-9 .label .label-blue } **P4 Help** +:   -Mar 24 -: **Dis 11**{: #dis-11 .label .label-blue }Prelude to Data -: [**Project 4 due**{: .label .label-red }](/projects/project4/){: .project-link} [**Final Project Checkins**{: .label .label-grey }](https://docs.google.com/spreadsheets/d/1hOdZyFN_mxRF0NCV8Rj6NLwLIk3tyXM84noFwGmp_MI/edit?usp=sharing){: .project-link target="_blank" rel="noopener noreferrer" } \ No newline at end of file + +Mar 26 +: [**Lec 20**]({{ site.baseurl }}/assets/slides/deeprob_lecture_19_transformers.pdf){: #lec-20 .label .label-purple target="_blank" rel="noopener noreferrer" } [**More Transformers**]({{ site.baseurl }}/assets/slides/deeprob_lecture_19_transformers.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/n5EkNf){: target="_blank" rel="noopener noreferrer"} +:   + + +Mar 30 +: [**Project 4 due**]({{ site.baseurl }}/projects/project4/){: .label .label-red } +:   diff --git a/_modules/week-12.md b/_modules/week-12.md index dec95dd..bcd6e05 100644 --- a/_modules/week-12.md +++ b/_modules/week-12.md @@ -1,17 +1,28 @@ --- -title: Week 12; Data +title: Week 12 --- -Mar 28 -: **Lec 22**{: #lec-22 .label .label-purple }Datasets for Perception - : [πŸ“ƒ Related Papers](/papers/#datasets) -: [**Quiz 12**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} [**Final Project Checkins**{: .label .label-grey }](https://docs.google.com/spreadsheets/d/1hOdZyFN_mxRF0NCV8Rj6NLwLIk3tyXM84noFwGmp_MI/edit?usp=sharing){: .project-link target="_blank" rel="noopener noreferrer" } -Mar 30 -: **Lec 23**{: #lec-23 .label .label-purple }Self-Supervised Learning - : [πŸ“ƒ Related Papers](/papers/#self-supervised-learning) -: [**Quiz 13**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} - Mar 31 -: **Dis 12**{: #dis-12 .label .label-blue }Prelude to Perception for Manipulation +: [**Lec 21**]({{ site.baseurl }}/assets/slides/deeprob_lecture_20_3d_vision.pdf){: #lec-21 .label .label-purple target="_blank" rel="noopener noreferrer" } [**3D Vision**]({{ site.baseurl }}/assets/slides/deeprob_lecture_20_3d_vision.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/RAbxWo){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Qi et al., PointNet](https://arxiv.org/abs/1612.00593){: target="_blank" rel="noopener noreferrer"} + + +Apr 1 +: **Dis 10**{: #dis-10 .label .label-blue } **Discussion** +:   + + + +Apr 2 +: [**Lec 22**]({{ site.baseurl }}/assets/slides/deeprob_lecture_21_nerf_3dgs.pdf){: #lec-22 .label .label-purple target="_blank" rel="noopener noreferrer" } [**NeRF & 3D Gaussian Splatting**]({{ site.baseurl }}/assets/slides/deeprob_lecture_21_nerf_3dgs.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/if0odM){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Mildenhall et al., NeRF](https://arxiv.org/abs/2003.08934){: target="_blank" rel="noopener noreferrer"} +:   + : [πŸ“– Kerbl et al., 3DGS](https://arxiv.org/abs/2308.04079){: target="_blank" rel="noopener noreferrer"} + + diff --git a/_modules/week-13.md b/_modules/week-13.md index 0733d3c..4393aa7 100644 --- a/_modules/week-13.md +++ b/_modules/week-13.md @@ -1,17 +1,26 @@ --- -title: Week 13; Perception for Manipulation +title: Week 13 --- -Apr 4 -: **Lec 24**{: #lec-24 .label .label-purple }Grasp Pose Detection - : [πŸ“ƒ Related Papers](/papers/#grasp-pose-detection) -: [**Quiz 14**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} -Apr 6 -: **Lec 25**{: #lec-25 .label .label-purple }Tactile Perception for Grasping and Manipulation - : [πŸ“ƒ Related Papers](/papers/#tactile-perception-for-grasping-and-manipulation) - Apr 7 -: **Dis 13**{: #dis-13 .label .label-blue }Prelude to Transformer Architectures +: [**Lec 23**]({{ site.baseurl }}/assets/slides/deeprob_lecture_22_visualizing.pdf){: #lec-23 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Visualizing Models**]({{ site.baseurl }}/assets/slides/deeprob_lecture_22_visualizing.pdf){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/Dhl2ni){: target="_blank" rel="noopener noreferrer"} +:   + + +Apr 8 +: **Dis 11**{: #dis-11 .label .label-blue } **Final Project Help** +:   + + +Apr 9 +: **Lec 24**{: #lec-24 .label .label-purple target="_blank" rel="noopener noreferrer" } **Multi-Sensor Fusion** + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/B9V1rL){: target="_blank" rel="noopener noreferrer"} +:   + + + + diff --git a/_modules/week-14.md b/_modules/week-14.md index 252c08f..87ca402 100644 --- a/_modules/week-14.md +++ b/_modules/week-14.md @@ -1,16 +1,25 @@ --- -title: Week 14; More Frontiers +title: Week 14 --- -Apr 11 -: **Lec 26**{: #lec-26 .label .label-purple } Transformer Architectures - : [πŸ“ƒ Related Papers](/papers/#pre-training-for-robot-manipulation-and-transformer-architectures) -: [**Quiz 15**{: .label .label-orange }](https://www.gradescope.com/courses/480760){: .project-link target="_blank" rel="noopener noreferrer"} - -Apr 13 -: **Lec 27**{: #lec-27 .label .label-purple }More Frontiers - : [πŸ“ƒ Related Papers](/papers/#more-frontiers) Apr 14 -: **Dis 14**{: #dis-14 .label .label-blue }Remaining Challenges and Limitations +: [**Lec 25**](https://sites.google.com/view/kavehfathian/home){: #lec-25 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Invited Speaker: Prof. Kaveh Fathian**β€”SLAM in the Era of AI](https://sites.google.com/view/kavehfathian/home){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/EG1ZvT){: target="_blank" rel="noopener noreferrer"} +:   + + +Apr 15 +: **Dis 12**{: #dis-12 .label .label-blue } **MemryX AI chip demo** + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/BrmaNW){: target="_blank" rel="noopener noreferrer"} +:   + + + + +Apr 16 +: [**Lec 26**](https://karthikdesingh.com){: #lec-26 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Invited Speaker: Prof. Karhik Desingh**β€”Object Assembly, a Spatial-Geometric Reasoning Pathway to Physical Intelligence](https://karthikdesingh.com){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/pVq0bl){: target="_blank" rel="noopener noreferrer"} +:   + diff --git a/_modules/week-15.md b/_modules/week-15.md index 2e57015..320243a 100644 --- a/_modules/week-15.md +++ b/_modules/week-15.md @@ -2,9 +2,20 @@ title: Week 15 --- -Apr 18 -: **Lec 28**{: #lec-28 .label .label-purple }Project Demos:Course Closure - : [πŸ“– R. P. Feynman, Cargo Cult Science](https://calteches.library.caltech.edu/51/2/CargoCult.htm){: target="_blank" rel="noopener noreferrer"} + Apr 21 -: [**Final Project Due**{: .label .label-red }](/projects/finalproject/){: .project-link} +: [**Lec 27**](https://glenchou.github.io){: #lec-27 .label .label-purple target="_blank" rel="noopener noreferrer" } [**Invited Speaker: Prof. Glenn Chou**β€”Toward End-to-End Reliable Learning-based Control for Robot Autonomy](https://glenchou.github.io){: target="_blank" rel="noopener noreferrer"} + : [![]({{ site.baseurl }}/assets/logos/yt_icon_rgb.png){: .module-logo } **Recording**](https://leccap.engin.umich.edu/leccap/player/r/lfcLlj){: target="_blank" rel="noopener noreferrer"} +:   + + +Apr 22 +: **Dis 13**{: #dis-13 .label .label-blue } **Project Showcase** +:   + + +Apr 23 +: **Lec 28**{: #lec-28 .label .label-purple } **No Class** +:   + diff --git a/_modules/week-16.md b/_modules/week-16.md new file mode 100644 index 0000000..5f2fe71 --- /dev/null +++ b/_modules/week-16.md @@ -0,0 +1,10 @@ +--- +title: Week 16 +--- + +Apr 28 +: **No Class** +: [**Final Project Due**]({{ site.baseurl }}/projects/finalproject/){: .label .label-red } + + + diff --git a/_sass/custom/custom.scss b/_sass/custom/custom.scss index eec3301..3491788 100644 --- a/_sass/custom/custom.scss +++ b/_sass/custom/custom.scss @@ -11,6 +11,7 @@ @import 'button'; @import 'layout'; @import 'colors'; +@import 'title_anim'; // Overrides code { @@ -71,47 +72,73 @@ summary { } .label-orange { - background-color: #f58842; + background-color: #f58842 !important; } .label-lecture { - background-color: $purple-000; + background-color: $purple-000 !important; } .label-seminar { - background-color: $purple-200; + background-color: $purple-200 !important; } .label-grey { - background-color: $grey-dk-100; + background-color: $grey-dk-100 !important; } +// .video-container { +// position: relative; +// overflow: hidden; +// height: 0; +// padding-bottom: 56.25%; /* creates a 16:9 aspect ratio */ +// } + +// .video-container iframe, +// .video-container embed { +// position: absolute; +// top: 0; +// left: 0; +// width: 100%; +// height: 100%; +// max-width: 100%; +// } + +// /* And set the max-width of the parent element */ +// .video-wrap { +// width: 100%; +// max-width: 560px; +// } + +.video-wrap { + display: flex; + justify-content: center; + flex-wrap: wrap; +} + .video-container { + width: calc(50% - 20px); + margin: 10px; position: relative; - overflow: hidden; - height: 0; - padding-bottom: 56.25%; /* creates a 16:9 aspect ratio */ + overflow: hidden; /* Hide overflow content */ +} + +.video-container:before { + content: ""; + display: block; + padding-top: 56.25%; /* 16:9 aspect ratio */ } -.video-container iframe, -.video-container embed { +.video-container iframe { position: absolute; top: 0; left: 0; width: 100%; height: 100%; - max-width: 100%; } -/* And set the max-width of the parent element */ -.video-wrap { - width: 100%; - max-width: 560px; -} - - .project-title { margin-top: 3% !important; text-align: center; @@ -137,9 +164,139 @@ summary { .center-image { text-align: center; justify-content: center; + // max-width: 100%; /* Constrain the width of the container */ + // margin: 0 auto; /* Center the container horizontally */ + // overflow-x: auto; /* Allow horizontal scrolling if needed */ } +// .center-image img { +// max-width: 50%; /* Constrain the width of the images */ +// height: auto; /* Maintain aspect ratio */ +// margin: 0 10px; /* Adjust spacing between images */ +// } + .project-links { text-align: center; } + +.banner-container { + margin-top: -1rem; + margin-right: -1rem; + width: auto; + position: relative; + text-align: left; + text-shadow: 0 0 1rem #FFFFFF, 0 0 1rem #FFFFFF, 0 0 1rem #FFFFFF, 0 0 1rem #FFFFFF, 0 0 1rem #FFFFFF, 0 0 1rem #FFFFFF; +} + +.banner-info { + position: absolute; + top: 5%; +} + +.banner-sub-info { + position: absolute; + bottom: 7%; +} + +.banner-title { + height: 8vw; + margin-bottom: 1.25vw; + font-weight: 600; + color: $grey-dk-300; +} + +.banner-subtitle { + height: 5vw; + font-weight: 400; + margin-bottom: 6vw; + color: $grey-dk-250; +} + +.banner-text { + font-size: 2.75vw; + font-weight: 300; +} + + + +@media (min-width: 50rem) { + + .banner-container { + margin-top: -2rem; + margin-right: -2rem; + } + + .banner-title { + height: 7.5vw; + margin-bottom: 0.75vw; + } + + .banner-subtitle { + height: 4.25vw; + } + + .banner-text { + font-size: 2.25vw; + } +} + + +.site-sub-logo { + flex-basis: 0%; + display: none; + + img { + width: 100%; + } + + @include mq(md) { + display: flex; + height: 100%; + align-items: center; + flex-basis: 68%; + } +} + + + + +.site-logo { + flex-basis: 100%; + + @include mq(md) { + flex-basis: 32%; + } +} + + + + +.hr-text { + border: 0; + @include fs-6; + font-weight: 500; + position: relative; + text-align: center; + color: $body-heading-color; + line-height: 0em; + @include mq(sm) { + line-height: 0em; + } +} + +.hr-text::before { + content: ""; + position: absolute; + left: 0; + top: 50%; + width: 100%; +} + +.hr-text::after { + background-color: white; + content: attr(content); + padding: 0 7px; + position: relative; +} + diff --git a/_sass/custom/footer.scss b/_sass/custom/footer.scss index a73bbd3..1db2e26 100644 --- a/_sass/custom/footer.scss +++ b/_sass/custom/footer.scss @@ -5,5 +5,5 @@ .footer-img { - height: 80px; + height: 95px; } \ No newline at end of file diff --git a/_sass/custom/schedule.scss b/_sass/custom/schedule.scss index be303ef..0813bc1 100644 --- a/_sass/custom/schedule.scss +++ b/_sass/custom/schedule.scss @@ -101,14 +101,21 @@ background-color: $purple-000; } - &.huijie-s-office-hours { + &.edmond-s-office-hours { background-color: $blue-000; } &.anthony-s-office-hours { background-color: #fab450; } - &.jiyue-s-office-hours { + &.xiaoxiao-s-off-hrs { background-color: $red-000; } + + &.yifu-s-off-hrs { + background-color: #6a2e73; + } + &.dalton-s-off-hrs { + background-color: #21c1d8; + } } } diff --git a/_sass/custom/staffer.scss b/_sass/custom/staffer.scss index 59199b0..09dff59 100644 --- a/_sass/custom/staffer.scss +++ b/_sass/custom/staffer.scss @@ -1,19 +1,8 @@ .staff-row { - - -webkit-column-count: 1; -moz-column-count: 1; column-count: 1; - display: inline; - - @include mq(lg) { - -webkit-column-count: 2; - -moz-column-count: 2; - column-count: 2; - display: flex; - } - - + display: inline; } .staffer-info { @@ -24,17 +13,31 @@ } .staff-column { + flex-wrap: wrap; width: 100%; + + -webkit-column-count: 1; + -moz-column-count: 1; + column-count: 1; + display: inline; + + @include mq(lg) { + -webkit-column-count: 2; + -moz-column-count: 2; + column-count: 2; + display: flex; + } } .staffer { display: flex; - flex-basis: 50%; + flex: 1 1 calc(50% - 2 * $sp-4); + max-width: calc(50% - 2 * $sp-4); margin: $sp-4; .staffer-image { border-radius: 50%; - height: 135px; + height: 160px; margin-right: $sp-4; } diff --git a/_sass/custom/title_anim.scss b/_sass/custom/title_anim.scss new file mode 100644 index 0000000..5eefb3b --- /dev/null +++ b/_sass/custom/title_anim.scss @@ -0,0 +1,86 @@ + +$graf-palt: (#9f0bbd, #27bdc2, #ed55c9, #d402c9, #4b0180, #27bdc2, #ce11f5, #cf0e95, #e8647a, #eb83b0, #469dfa, #803cb0); +$graf-back: (#4f045e, #197a7d, #a8398e, #800178, #340059, #197a7d, #7c0794, #82075d, #9c404f, #9e5172, #2a65a3, #471f63); + +$title-svg-deep-count: 4; + +@for $i from 1 through $title-svg-deep-count { + #title-svg-DeepRob-#{$i} { + fill: nth($graf-palt, $i); + stroke: nth($graf-back, $i); + stroke-width: 0.45; + + } +} + +$title-svg-DL-count: 12; + +@for $i from 1 through $title-svg-DL-count { + #title-svg-DLxRP-DL-#{$i} { + fill: nth($graf-palt, $i); + stroke: nth($graf-back, $i); + stroke-width: 0.5; + animation: grafAnimate 7s linear 0s infinite alternate-reverse; + } +} + +@keyframes grafAnimate { + 0%{ + stroke-dasharray: 0% 20%; + } + 20%{ + stroke-dasharray: 10% 20%; + } + 80%{ + stroke-dasharray: 20% 10%; + } + 100%{ + stroke-dasharray: 20% 0%; + } +} + + + +$title-svg-rob-count: 3; +$title-svg-rob-rand: 5+random(32), 5+random(32), 5+random(32); +$title-svg-rob-randd: random(24), random(24), random(24); + +@for $i from 1 through $title-svg-rob-count { + #title-svg-DeepRob-#{$i + 4} { + + } +} + +@keyframes slowAnimate { + 0%{ + stroke-dasharray: 0 5%; + stroke-dashoffset: 10%; + } + 100%{ + stroke-dasharray: 5% 0; + stroke-dashoffset: -10%; + } +} + + +$title-svg-rp-count: 15; +$title-svg-rp-rand: 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17), 4+random(17); + + + +@for $i from 1 through $title-svg-rp-count { + #title-svg-DLxRP-RP-#{$i} { + animation: fastAnimate nth($title-svg-rp-rand, $i) + s infinite alternate-reverse; + } +} + +@keyframes fastAnimate { + 0%{ + stroke-dasharray: 0 40%; + stroke-dashoffset: 30%; + } + 100%{ + stroke-dasharray: 40% 0; + stroke-dashoffset: -30%; + } +} \ No newline at end of file diff --git a/_schedules/weekly.md b/_schedules/weekly.md index 59421f4..e449016 100644 --- a/_schedules/weekly.md +++ b/_schedules/weekly.md @@ -1,5 +1,6 @@ --- timeline: + - '8:00 AM' - '9:00 AM' - '9:30 AM' - '10:00 AM' @@ -20,54 +21,61 @@ timeline: - '5:30 PM' - '6:00 PM' schedule: - - name: Monday 03/27 + - name: Mon. 04/15 events: - - name: Huijie's Office Hours - start: 2:30 PM - end: 3:30 PM - zoom: Zoom Link - - name: Tuesday 03/28 + - name: Anthony's Office Hours + start: 8:00 AM + end: 10:00 AM + location: FRB 2000 + zoom: Zoom Link (deeprob) + - name: Yifu's Off. Hrs. + start: 3:30 PM + end: 4:30 PM + location: BBB First Floor + zoom: Zoom Link + - name: Dalton's Off. Hrs. + start: 4:30 PM + end: 5:30 PM + location: FRB 2000 + zoom: Zoom Link (deeprob) + - name: Tue. 04/16 events: - - name: Huijie's Office Hours - start: 2:00 PM - end: 3:00 PM - zoom: Zoom Link - - name: Lecture 22 + - name: Lecture 25 start: 3:00 PM end: 4:30 PM - location: 1060 FMCRB - zoom: Zoom Link + location: COOL G906 + zoom: Zoom Link (deeprob) class: lecture - - name: Final Project Checkins + - name: Xiaoxiao's Off. Hrs. start: 4:30 PM - end: 5:30 PM - location: 1060 FMCRB - - name: Wednesday 03/29 + end: 5:15 PM + location: COOL G906 (hallway) + - name: Wed. 04/17 events: - - name: Thursday 03/30 + - name: Edmond's Office Hours + start: 1:30 PM + end: 3:30 PM + location: FRB 3320 + zoom: Zoom Link (deeprob) + - name: Discussion 14 + start: 3:30 PM + end: 5:30 PM + location: EECS 1311 + zoom: Zoom Link (deeprob) + class: discussion + - name: Thr. 04/18 events: - - name: Jiyue's Office Hours - start: 1:00 PM - end: 3:00 PM - location: 2320 FMCRB - zoom: Zoom Link - - name: Lecture 23 + - name: Lecture 26 start: 3:00 PM end: 4:30 PM - location: 1060 FMCRB - zoom: Zoom Link + location: COOL G906 + zoom: Zoom Link (deeprob) class: lecture - - name: Friday 03/31 - events: - - name: Discussion 12 + - name: Xiaoxiao's Off. Hrs. start: 4:30 PM - end: 5:30 PM - location: 1060 FMCRB - zoom: Zoom Link - class: discussion + end: 5:15 PM + location: COOL G906 (hallway) + - name: Fri. 04/19 + events: + --- - - \ No newline at end of file diff --git a/_staffers/adi.md b/_staffers/adi.md new file mode 100644 index 0000000..7da59ca --- /dev/null +++ b/_staffers/adi.md @@ -0,0 +1,8 @@ +--- +name: Adi Balaji +role: Instructional Assistant +email: advaithb@umich.edu +website: https://adi-balaji.github.io/portfolio/ +photo: images/adi.webp +order: 1 +--- diff --git a/_staffers/anthony.md b/_staffers/anthony.md index 852aa5a..da58d8e 100644 --- a/_staffers/anthony.md +++ b/_staffers/anthony.md @@ -1,8 +1,8 @@ --- name: Anthony Opipari -role: Research Associate +role: Instructor email: topipari@umich.edu website: https://topipari.com -photo: anthony.webp - +photo: images/anthony.webp +order: 2 --- diff --git a/_staffers/cale.md b/_staffers/cale.md new file mode 100644 index 0000000..4adf137 --- /dev/null +++ b/_staffers/cale.md @@ -0,0 +1,8 @@ +--- +name: Cale Colony +role: Graduate Student Instructor +email: ccolony@umich.edu +website: https://www.linkedin.com/in/cale-colony-bbbb577/ +photo: images/cale.webp +order: 1 +--- diff --git a/_staffers/chad.md b/_staffers/chad.md index ce9feb6..2f2e770 100644 --- a/_staffers/chad.md +++ b/_staffers/chad.md @@ -1,9 +1,8 @@ --- name: Chad Jenkins -role: Instructor +role: Advising Faculty email: ocj@umich.edu website: https://ocj.name/ -photo: chad.webp -order: 2 +photo: images/chad.webp --- diff --git a/_staffers/jason.md b/_staffers/jason.md new file mode 100644 index 0000000..7efc2b1 --- /dev/null +++ b/_staffers/jason.md @@ -0,0 +1,7 @@ +--- +name: Jason Brown +role: Instructional Assistant +email: jaybrow@umich.edu +photo: images/jason.webp +order: 3 +--- diff --git a/_staffers/meha.md b/_staffers/meha.md new file mode 100644 index 0000000..cedfa49 --- /dev/null +++ b/_staffers/meha.md @@ -0,0 +1,7 @@ +--- +name: Meha Goyal +role: Instructional Assistant +email: mehag@umich.edu +photo: images/meha.webp +order: 4 +--- diff --git a/_staffers/sydney.md b/_staffers/sydney.md new file mode 100644 index 0000000..69f5e59 --- /dev/null +++ b/_staffers/sydney.md @@ -0,0 +1,8 @@ +--- +name: Sydney Belt +role: Instructional Assistant +email: sydbelt@umich.edu +website: https://www.linkedin.com/in/sydney-belt-a0690b252/ +photo: images/sydney.webp +order: 2 +--- diff --git a/_staffers/xiaoxiao.md b/_staffers/xiaoxiao.md index 541e527..4669b0b 100644 --- a/_staffers/xiaoxiao.md +++ b/_staffers/xiaoxiao.md @@ -3,7 +3,6 @@ name: Xiaoxiao Du role: Instructor email: xiaodu@umich.edu website: https://xiaoxiaodu.net -photo: xiaoxiao.webp +photo: images/xiaoxiao.webp order: 1 - --- diff --git a/assets/images/adi.webp b/assets/images/adi.webp new file mode 100644 index 0000000..57ed382 Binary files /dev/null and b/assets/images/adi.webp differ diff --git a/assets/images/banner.jpg b/assets/images/banner.jpg new file mode 100644 index 0000000..a9e4fca Binary files /dev/null and b/assets/images/banner.jpg differ diff --git a/assets/images/cale.webp b/assets/images/cale.webp new file mode 100644 index 0000000..8d0d762 Binary files /dev/null and b/assets/images/cale.webp differ diff --git a/assets/images/jason.webp b/assets/images/jason.webp new file mode 100644 index 0000000..47258d3 Binary files /dev/null and b/assets/images/jason.webp differ diff --git a/assets/images/meha.webp b/assets/images/meha.webp new file mode 100644 index 0000000..fdfba30 Binary files /dev/null and b/assets/images/meha.webp differ diff --git a/assets/images/multi_head_attention.webp b/assets/images/multi_head_attention.webp new file mode 100644 index 0000000..2035d94 Binary files /dev/null and b/assets/images/multi_head_attention.webp differ diff --git a/assets/images/posecnn_arch.png b/assets/images/posecnn_arch.png deleted file mode 100644 index 4bc8f37..0000000 Binary files a/assets/images/posecnn_arch.png and /dev/null differ diff --git a/assets/images/posecnn_hough.png b/assets/images/posecnn_hough.png deleted file mode 100644 index be56938..0000000 Binary files a/assets/images/posecnn_hough.png and /dev/null differ diff --git a/assets/images/scaled_dot_product_attention.webp b/assets/images/scaled_dot_product_attention.webp new file mode 100644 index 0000000..9894f9d Binary files /dev/null and b/assets/images/scaled_dot_product_attention.webp differ diff --git a/assets/images/sydney.webp b/assets/images/sydney.webp new file mode 100644 index 0000000..af5dd0a Binary files /dev/null and b/assets/images/sydney.webp differ diff --git a/assets/images/vit_arch.webp b/assets/images/vit_arch.webp new file mode 100644 index 0000000..f4c1dcc Binary files /dev/null and b/assets/images/vit_arch.webp differ diff --git a/assets/images/vit_encoder.webp b/assets/images/vit_encoder.webp new file mode 100644 index 0000000..662fbc4 Binary files /dev/null and b/assets/images/vit_encoder.webp differ diff --git a/assets/labs/deeprob_discussion_02_pytorch.ipynb b/assets/labs/deeprob_discussion_02_pytorch.ipynb new file mode 100644 index 0000000..d5e4089 --- /dev/null +++ b/assets/labs/deeprob_discussion_02_pytorch.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "PtKvmZx-WmUu" + }, + "outputs": [], + "source": [ + "#@title Insatlling Pyorch\n", + "\n", + "#!pip install torch\n", + "#!pip install torchvision" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "bGU6NwlsXFSt" + }, + "outputs": [], + "source": [ + "#@title Import Dependencies\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torchvision.datasets as dsets\n", + "import torchvision.transforms as transforms\n", + "import matplotlib.pyplot as plt\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "_bNfVLRUYqZA" + }, + "outputs": [], + "source": [ + "#@title Define Hyperparameters\n", + "\n", + "input_size = 784 # img_size = (28,28) ---> 28*28=784 in total\n", + "hidden_size = 500 # number of nodes at hidden layer\n", + "num_classes = 10 # number of output classes discrete range [0,9]\n", + "num_epochs = 20 # number of times which the entire dataset is passed throughout the model\n", + "batch_size = 100 # the size of input data took for one iteration\n", + "lr = 1e-3 # size of step" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "lCsBCXMwbpH5" + }, + "outputs": [], + "source": [ + "#@title Downloading MNIST data\n", + "\n", + "train_data = dsets.MNIST(root = './data', train = True,\n", + " transform = transforms.ToTensor(), download = True)\n", + "\n", + "test_data = dsets.MNIST(root = './data', train = False,\n", + " transform = transforms.ToTensor())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "rfDPBdnYgfGp" + }, + "outputs": [], + "source": [ + "#@title Loading the data\n", + "\n", + "train_gen = torch.utils.data.DataLoader(dataset = train_data,\n", + " batch_size = batch_size,\n", + " shuffle = True)\n", + "\n", + "show_gen = torch.utils.data.DataLoader(dataset = test_data,\n", + " batch_size = 1,\n", + " shuffle = True)\n", + "\n", + "test_gen = torch.utils.data.DataLoader(dataset = test_data,\n", + " batch_size = batch_size,\n", + " shuffle = False)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "fL-YXTvghaz_" + }, + "outputs": [], + "source": [ + "#@title Define model class\n", + "\n", + "class Net(nn.Module):\n", + " def __init__(self, input_size, hidden_size, num_classes):\n", + " super(Net,self).__init__()\n", + " self.fc1 = nn.Linear(input_size, hidden_size)\n", + " self.relu = nn.ReLU()\n", + " self.fc2 = nn.Linear(hidden_size, num_classes)\n", + "\n", + " def forward(self,x):\n", + " out = self.fc1(x)\n", + " out = self.relu(out)\n", + " out = self.fc2(out)\n", + " return out" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "-3EPEqbjjfAT" + }, + "outputs": [ + { + "ename": "RuntimeError", + "evalue": "CUDA error: CUDA-capable device(s) is/are busy or unavailable\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m net \u001b[38;5;241m=\u001b[39m Net(input_size, hidden_size, num_classes)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mcuda\u001b[38;5;241m.\u001b[39mis_available():\n\u001b[0;32m----> 5\u001b[0m \u001b[43mnet\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/saycan/lib/python3.8/site-packages/torch/nn/modules/module.py:918\u001b[0m, in \u001b[0;36mModule.cuda\u001b[0;34m(self, device)\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcuda\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, device: Optional[Union[\u001b[38;5;28mint\u001b[39m, device]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 902\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Moves all model parameters and buffers to the GPU.\u001b[39;00m\n\u001b[1;32m 903\u001b[0m \n\u001b[1;32m 904\u001b[0m \u001b[38;5;124;03m This also makes associated parameters and buffers different objects. So\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;124;03m Module: self\u001b[39;00m\n\u001b[1;32m 917\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 918\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43;01mlambda\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/anaconda3/envs/saycan/lib/python3.8/site-packages/torch/nn/modules/module.py:810\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 808\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m recurse:\n\u001b[1;32m 809\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m module \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mchildren():\n\u001b[0;32m--> 810\u001b[0m \u001b[43mmodule\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_apply\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 812\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompute_should_use_set_data\u001b[39m(tensor, tensor_applied):\n\u001b[1;32m 813\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m torch\u001b[38;5;241m.\u001b[39m_has_compatible_shallow_copy_type(tensor, tensor_applied):\n\u001b[1;32m 814\u001b[0m \u001b[38;5;66;03m# If the new tensor has compatible tensor type as the existing tensor,\u001b[39;00m\n\u001b[1;32m 815\u001b[0m \u001b[38;5;66;03m# the current behavior is to change the tensor in-place using `.data =`,\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 820\u001b[0m \u001b[38;5;66;03m# global flag to let the user control whether they want the future\u001b[39;00m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;66;03m# behavior of overwriting the existing tensor or not.\u001b[39;00m\n", + "File \u001b[0;32m~/anaconda3/envs/saycan/lib/python3.8/site-packages/torch/nn/modules/module.py:833\u001b[0m, in \u001b[0;36mModule._apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 829\u001b[0m \u001b[38;5;66;03m# Tensors stored in modules are graph leaves, and we don't want to\u001b[39;00m\n\u001b[1;32m 830\u001b[0m \u001b[38;5;66;03m# track autograd history of `param_applied`, so we have to use\u001b[39;00m\n\u001b[1;32m 831\u001b[0m \u001b[38;5;66;03m# `with torch.no_grad():`\u001b[39;00m\n\u001b[1;32m 832\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 833\u001b[0m param_applied \u001b[38;5;241m=\u001b[39m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mparam\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 834\u001b[0m should_use_set_data \u001b[38;5;241m=\u001b[39m compute_should_use_set_data(param, param_applied)\n\u001b[1;32m 835\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m should_use_set_data:\n", + "File \u001b[0;32m~/anaconda3/envs/saycan/lib/python3.8/site-packages/torch/nn/modules/module.py:918\u001b[0m, in \u001b[0;36mModule.cuda..\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m 901\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcuda\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, device: Optional[Union[\u001b[38;5;28mint\u001b[39m, device]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[1;32m 902\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124mr\u001b[39m\u001b[38;5;124;03m\"\"\"Moves all model parameters and buffers to the GPU.\u001b[39;00m\n\u001b[1;32m 903\u001b[0m \n\u001b[1;32m 904\u001b[0m \u001b[38;5;124;03m This also makes associated parameters and buffers different objects. So\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 916\u001b[0m \u001b[38;5;124;03m Module: self\u001b[39;00m\n\u001b[1;32m 917\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 918\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_apply(\u001b[38;5;28;01mlambda\u001b[39;00m t: \u001b[43mt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcuda\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m)\n", + "\u001b[0;31mRuntimeError\u001b[0m: CUDA error: CUDA-capable device(s) is/are busy or unavailable\nCUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.\nFor debugging consider passing CUDA_LAUNCH_BLOCKING=1.\nCompile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.\n" + ] + } + ], + "source": [ + "#@title Build the model\n", + "\n", + "net = Net(input_size, hidden_size, num_classes)\n", + "if torch.cuda.is_available():\n", + " net.cuda()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ePLIwvAFj2zH" + }, + "outputs": [], + "source": [ + "#@title Define loss-function & optimizer\n", + "\n", + "loss_function = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam( net.parameters(), lr=lr)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u75Xa5VckuTH" + }, + "outputs": [], + "source": [ + "#@title Training the model\n", + "\n", + "for epoch in range(num_epochs):\n", + " for i ,(images,labels) in enumerate(train_gen):\n", + " images = (images.view(-1,28*28)).cuda()\n", + " labels = (labels).cuda()\n", + "\n", + " optimizer.zero_grad()\n", + " outputs = net(images)\n", + " loss = loss_function(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (i+1) % 100 == 0:\n", + " print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'\n", + " %(epoch+1, num_epochs, i+1, len(train_data)//batch_size, loss.data.item()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DTPvMW5jHB9X" + }, + "outputs": [], + "source": [ + "#@title Evaluating the accuracy of the model\n", + "\n", + "correct = 0\n", + "total = 0\n", + "for images,labels in test_gen:\n", + " images = (images.view(-1,28*28)).cuda()\n", + " labels = labels.cuda()\n", + "\n", + " output = net(images)\n", + " _, predicted = torch.max(output,1)\n", + " correct += (predicted == labels).sum()\n", + " total += labels.size(0)\n", + "\n", + "print('Accuracy of the model: %.3f %%' %((100*correct)/(total+1)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jP9BP36Bk2i-" + }, + "source": [ + "# Display one prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6RxE4QFD-Ujh" + }, + "outputs": [], + "source": [ + "for images,labels in show_gen:\n", + " image = images\n", + " images = (images.view(-1,28*28)).cuda()\n", + " output = net(images)\n", + " _, predicted = torch.max(output,1)\n", + " break\n", + "\n", + "print(f\"Predicted = {predicted.item()}\")\n", + "plt.imshow(image.squeeze().cpu().numpy(), cmap='gray')" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/assets/labs/deeprob_discussion_03_props.zip b/assets/labs/deeprob_discussion_03_props.zip new file mode 100644 index 0000000..047697b Binary files /dev/null and b/assets/labs/deeprob_discussion_03_props.zip differ diff --git a/assets/logos/DeepRobBlue.svg b/assets/logos/DeepRobBlue.svg new file mode 100644 index 0000000..735cbcf --- /dev/null +++ b/assets/logos/DeepRobBlue.svg @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/assets/logos/Jupyter.svg b/assets/logos/Jupyter.svg new file mode 100644 index 0000000..ab25508 --- /dev/null +++ b/assets/logos/Jupyter.svg @@ -0,0 +1,90 @@ + +Group.svg +Created using Figma 0.90 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/assets/logos/external/JAX.svg b/assets/logos/external/JAX.svg new file mode 100644 index 0000000..93dc07b --- /dev/null +++ b/assets/logos/external/JAX.svg @@ -0,0 +1 @@ +JAX Light Stroke \ No newline at end of file diff --git a/assets/logos/external/TF.svg b/assets/logos/external/TF.svg new file mode 100644 index 0000000..faf3602 --- /dev/null +++ b/assets/logos/external/TF.svg @@ -0,0 +1 @@ +FullColorPrimary Horizontal \ No newline at end of file diff --git a/assets/logos/external/caffe.png b/assets/logos/external/caffe.png new file mode 100644 index 0000000..38f7a15 Binary files /dev/null and b/assets/logos/external/caffe.png differ diff --git a/assets/logos/external/caffe2.png b/assets/logos/external/caffe2.png new file mode 100644 index 0000000..0bbd914 Binary files /dev/null and b/assets/logos/external/caffe2.png differ diff --git a/assets/logos/external/darknet.png b/assets/logos/external/darknet.png new file mode 100644 index 0000000..f1f6aa3 Binary files /dev/null and b/assets/logos/external/darknet.png differ diff --git a/assets/logos/external/flux.png b/assets/logos/external/flux.png new file mode 100644 index 0000000..c54befa Binary files /dev/null and b/assets/logos/external/flux.png differ diff --git a/assets/logos/external/mlj.svg b/assets/logos/external/mlj.svg new file mode 100644 index 0000000..e06ed96 --- /dev/null +++ b/assets/logos/external/mlj.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/assets/logos/external/paddle.png b/assets/logos/external/paddle.png new file mode 100644 index 0000000..ff99531 Binary files /dev/null and b/assets/logos/external/paddle.png differ diff --git a/assets/logos/external/pytorch.svg b/assets/logos/external/pytorch.svg new file mode 100644 index 0000000..567db87 --- /dev/null +++ b/assets/logos/external/pytorch.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/assets/logos/favicons/UMich_favicon_dark.ico b/assets/logos/favicons/UMich_favicon_dark.ico index be4aff2..dd3d5cb 100644 Binary files a/assets/logos/favicons/UMich_favicon_dark.ico and b/assets/logos/favicons/UMich_favicon_dark.ico differ diff --git a/assets/logos/favicons/UMich_favicon_dark.png b/assets/logos/favicons/UMich_favicon_dark.png index a66f57e..86838a2 100644 Binary files a/assets/logos/favicons/UMich_favicon_dark.png and b/assets/logos/favicons/UMich_favicon_dark.png differ diff --git a/assets/logos/favicons/UMich_favicon_light.ico b/assets/logos/favicons/UMich_favicon_light.ico index 55e1583..85c7865 100644 Binary files a/assets/logos/favicons/UMich_favicon_light.ico and b/assets/logos/favicons/UMich_favicon_light.ico differ diff --git a/assets/logos/favicons/UMich_favicon_light.png b/assets/logos/favicons/UMich_favicon_light.png index 1e4d4e5..d23ca01 100644 Binary files a/assets/logos/favicons/UMich_favicon_light.png and b/assets/logos/favicons/UMich_favicon_light.png differ diff --git a/assets/logos/favicons/UMinn_favicon_dark.ico b/assets/logos/favicons/UMinn_favicon_dark.ico deleted file mode 100644 index 28160d3..0000000 Binary files a/assets/logos/favicons/UMinn_favicon_dark.ico and /dev/null differ diff --git a/assets/logos/favicons/UMinn_favicon_dark.png b/assets/logos/favicons/UMinn_favicon_dark.png deleted file mode 100644 index 41d271e..0000000 Binary files a/assets/logos/favicons/UMinn_favicon_dark.png and /dev/null differ diff --git a/assets/logos/favicons/UMinn_favicon_light.ico b/assets/logos/favicons/UMinn_favicon_light.ico deleted file mode 100644 index 0fde04a..0000000 Binary files a/assets/logos/favicons/UMinn_favicon_light.ico and /dev/null differ diff --git a/assets/logos/favicons/UMinn_favicon_light.png b/assets/logos/favicons/UMinn_favicon_light.png deleted file mode 100644 index d8cf6cd..0000000 Binary files a/assets/logos/favicons/UMinn_favicon_light.png and /dev/null differ diff --git a/assets/projects/DeepRobPaperReview.zip b/assets/projects/DeepRobPaperReview.zip new file mode 100644 index 0000000..fffaa1d Binary files /dev/null and b/assets/projects/DeepRobPaperReview.zip differ diff --git a/assets/projects/DeepRobProjectReport.zip b/assets/projects/DeepRobProjectReport.zip new file mode 100644 index 0000000..e8c05a8 Binary files /dev/null and b/assets/projects/DeepRobProjectReport.zip differ diff --git a/assets/projects/P0.zip b/assets/projects/P0.zip new file mode 100644 index 0000000..50b50ef Binary files /dev/null and b/assets/projects/P0.zip differ diff --git a/assets/projects/P1.zip b/assets/projects/P1.zip new file mode 100644 index 0000000..a7cbac4 Binary files /dev/null and b/assets/projects/P1.zip differ diff --git a/assets/projects/P2.zip b/assets/projects/P2.zip new file mode 100644 index 0000000..cde5804 Binary files /dev/null and b/assets/projects/P2.zip differ diff --git a/assets/projects/P3.zip b/assets/projects/P3.zip new file mode 100644 index 0000000..34da5ce Binary files /dev/null and b/assets/projects/P3.zip differ diff --git a/assets/projects/P4.zip b/assets/projects/P4.zip new file mode 100644 index 0000000..fe8418d Binary files /dev/null and b/assets/projects/P4.zip differ diff --git a/assets/projects/reports/BAGS/BAGS_compare.webp b/assets/projects/reports/BAGS/BAGS_compare.webp new file mode 100644 index 0000000..3889d0e Binary files /dev/null and b/assets/projects/reports/BAGS/BAGS_compare.webp differ diff --git a/assets/projects/reports/BAGS/BAGS_output.webp b/assets/projects/reports/BAGS/BAGS_output.webp new file mode 100644 index 0000000..7c0741f Binary files /dev/null and b/assets/projects/reports/BAGS/BAGS_output.webp differ diff --git a/assets/projects/reports/BAGS/Loss_BAGS.webp b/assets/projects/reports/BAGS/Loss_BAGS.webp new file mode 100644 index 0000000..19934c5 Binary files /dev/null and b/assets/projects/reports/BAGS/Loss_BAGS.webp differ diff --git a/assets/projects/reports/BAGS/different_rotation_degree.webp b/assets/projects/reports/BAGS/different_rotation_degree.webp new file mode 100644 index 0000000..ee4b519 Binary files /dev/null and b/assets/projects/reports/BAGS/different_rotation_degree.webp differ diff --git a/assets/projects/reports/BAGS/pose_errors.webp b/assets/projects/reports/BAGS/pose_errors.webp new file mode 100644 index 0000000..3ce3ecc Binary files /dev/null and b/assets/projects/reports/BAGS/pose_errors.webp differ diff --git a/assets/projects/reports/BAGS/table_sc.webp b/assets/projects/reports/BAGS/table_sc.webp new file mode 100644 index 0000000..3762bda Binary files /dev/null and b/assets/projects/reports/BAGS/table_sc.webp differ diff --git a/assets/projects/reports/DOPE-Plus/000024_belief_maps.webp b/assets/projects/reports/DOPE-Plus/000024_belief_maps.webp new file mode 100644 index 0000000..0dcfb67 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/000024_belief_maps.webp differ diff --git a/assets/projects/reports/DOPE-Plus/000031.webp b/assets/projects/reports/DOPE-Plus/000031.webp new file mode 100644 index 0000000..5d20289 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/000031.webp differ diff --git a/assets/projects/reports/DOPE-Plus/000210-validate.webp b/assets/projects/reports/DOPE-Plus/000210-validate.webp new file mode 100644 index 0000000..45c7c0f Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/000210-validate.webp differ diff --git a/assets/projects/reports/DOPE-Plus/000210.webp b/assets/projects/reports/DOPE-Plus/000210.webp new file mode 100644 index 0000000..4fa980c Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/000210.webp differ diff --git a/assets/projects/reports/DOPE-Plus/000215-validate.webp b/assets/projects/reports/DOPE-Plus/000215-validate.webp new file mode 100644 index 0000000..472aa5b Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/000215-validate.webp differ diff --git a/assets/projects/reports/DOPE-Plus/000215.webp b/assets/projects/reports/DOPE-Plus/000215.webp new file mode 100644 index 0000000..c9439b3 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/000215.webp differ diff --git a/assets/projects/reports/DOPE-Plus/HDR_example.webp b/assets/projects/reports/DOPE-Plus/HDR_example.webp new file mode 100644 index 0000000..295345d Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/HDR_example.webp differ diff --git a/assets/projects/reports/DOPE-Plus/HOPE-syntheic_GT.webp b/assets/projects/reports/DOPE-Plus/HOPE-syntheic_GT.webp new file mode 100644 index 0000000..125b133 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/HOPE-syntheic_GT.webp differ diff --git a/assets/projects/reports/DOPE-Plus/HOPE_GroundTruth.webp b/assets/projects/reports/DOPE-Plus/HOPE_GroundTruth.webp new file mode 100644 index 0000000..3b90150 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/HOPE_GroundTruth.webp differ diff --git a/assets/projects/reports/DOPE-Plus/Loss_block.webp b/assets/projects/reports/DOPE-Plus/Loss_block.webp new file mode 100644 index 0000000..360b48c Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/Loss_block.webp differ diff --git a/assets/projects/reports/DOPE-Plus/Loss_cookies.webp b/assets/projects/reports/DOPE-Plus/Loss_cookies.webp new file mode 100644 index 0000000..3cbed30 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/Loss_cookies.webp differ diff --git a/assets/projects/reports/DOPE-Plus/ROB599_Final_report.pdf b/assets/projects/reports/DOPE-Plus/ROB599_Final_report.pdf new file mode 100644 index 0000000..49c7b80 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/ROB599_Final_report.pdf differ diff --git a/assets/projects/reports/DOPE-Plus/ViT-DOPE_structure.webp b/assets/projects/reports/DOPE-Plus/ViT-DOPE_structure.webp new file mode 100644 index 0000000..020d020 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/ViT-DOPE_structure.webp differ diff --git a/assets/projects/reports/DOPE-Plus/accuracy_block.webp b/assets/projects/reports/DOPE-Plus/accuracy_block.webp new file mode 100644 index 0000000..ca9ef88 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/accuracy_block.webp differ diff --git a/assets/projects/reports/DOPE-Plus/accuracy_cookies.webp b/assets/projects/reports/DOPE-Plus/accuracy_cookies.webp new file mode 100644 index 0000000..4244053 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/accuracy_cookies.webp differ diff --git a/assets/projects/reports/DOPE-Plus/block_blender.webp b/assets/projects/reports/DOPE-Plus/block_blender.webp new file mode 100644 index 0000000..35398fd Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/block_blender.webp differ diff --git a/assets/projects/reports/DOPE-Plus/inference_predict.webp b/assets/projects/reports/DOPE-Plus/inference_predict.webp new file mode 100644 index 0000000..c623890 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/inference_predict.webp differ diff --git a/assets/projects/reports/DOPE-Plus/mAP_block.webp b/assets/projects/reports/DOPE-Plus/mAP_block.webp new file mode 100644 index 0000000..2e6f3b9 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/mAP_block.webp differ diff --git a/assets/projects/reports/DOPE-Plus/mAP_cookies.webp b/assets/projects/reports/DOPE-Plus/mAP_cookies.webp new file mode 100644 index 0000000..eb7c903 Binary files /dev/null and b/assets/projects/reports/DOPE-Plus/mAP_cookies.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/Anygrasp.webp b/assets/projects/reports/FETCH-GRASP/Anygrasp.webp new file mode 100644 index 0000000..f4c744d Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/Anygrasp.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/DeepRob_gif.gif b/assets/projects/reports/FETCH-GRASP/DeepRob_gif.gif new file mode 100644 index 0000000..183e961 Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/DeepRob_gif.gif differ diff --git a/assets/projects/reports/FETCH-GRASP/deform1.webp b/assets/projects/reports/FETCH-GRASP/deform1.webp new file mode 100644 index 0000000..795c0a1 Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/deform1.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/deform2.webp b/assets/projects/reports/FETCH-GRASP/deform2.webp new file mode 100644 index 0000000..ffefd92 Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/deform2.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/rigid_grasp.webp b/assets/projects/reports/FETCH-GRASP/rigid_grasp.webp new file mode 100644 index 0000000..62102e9 Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/rigid_grasp.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/rigid_grasp_2.png b/assets/projects/reports/FETCH-GRASP/rigid_grasp_2.png new file mode 100644 index 0000000..51c91b3 Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/rigid_grasp_2.png differ diff --git a/assets/projects/reports/FETCH-GRASP/transcg.webp b/assets/projects/reports/FETCH-GRASP/transcg.webp new file mode 100644 index 0000000..a35e2ca Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/transcg.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/transcg1.webp b/assets/projects/reports/FETCH-GRASP/transcg1.webp new file mode 100644 index 0000000..099dde5 Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/transcg1.webp differ diff --git a/assets/projects/reports/FETCH-GRASP/transcg2.webp b/assets/projects/reports/FETCH-GRASP/transcg2.webp new file mode 100644 index 0000000..d5fb27f Binary files /dev/null and b/assets/projects/reports/FETCH-GRASP/transcg2.webp differ diff --git a/assets/projects/reports/acceleration/DeepRobProjectReport.pdf b/assets/projects/reports/acceleration/DeepRobProjectReport.pdf new file mode 100644 index 0000000..a35b38d Binary files /dev/null and b/assets/projects/reports/acceleration/DeepRobProjectReport.pdf differ diff --git a/assets/projects/reports/acceleration/background.png b/assets/projects/reports/acceleration/background.png new file mode 100644 index 0000000..e0469ef Binary files /dev/null and b/assets/projects/reports/acceleration/background.png differ diff --git a/assets/projects/reports/acceleration/f1.png b/assets/projects/reports/acceleration/f1.png new file mode 100644 index 0000000..02d8247 Binary files /dev/null and b/assets/projects/reports/acceleration/f1.png differ diff --git a/assets/projects/reports/acceleration/f2.png b/assets/projects/reports/acceleration/f2.png new file mode 100644 index 0000000..a12ed8a Binary files /dev/null and b/assets/projects/reports/acceleration/f2.png differ diff --git a/assets/projects/reports/acceleration/f3.png b/assets/projects/reports/acceleration/f3.png new file mode 100644 index 0000000..31eb010 Binary files /dev/null and b/assets/projects/reports/acceleration/f3.png differ diff --git a/assets/projects/reports/acceleration/image1.png b/assets/projects/reports/acceleration/image1.png new file mode 100644 index 0000000..e813514 Binary files /dev/null and b/assets/projects/reports/acceleration/image1.png differ diff --git a/assets/projects/reports/acceleration/image2.jpg b/assets/projects/reports/acceleration/image2.jpg new file mode 100644 index 0000000..c7a446c Binary files /dev/null and b/assets/projects/reports/acceleration/image2.jpg differ diff --git a/assets/projects/reports/acceleration/result.png b/assets/projects/reports/acceleration/result.png new file mode 100644 index 0000000..721d219 Binary files /dev/null and b/assets/projects/reports/acceleration/result.png differ diff --git a/assets/projects/reports/acceleration/result1.jpg b/assets/projects/reports/acceleration/result1.jpg new file mode 100644 index 0000000..0fff591 Binary files /dev/null and b/assets/projects/reports/acceleration/result1.jpg differ diff --git a/assets/projects/reports/aura/Deep_Rob_Final_Report.pdf b/assets/projects/reports/aura/Deep_Rob_Final_Report.pdf new file mode 100644 index 0000000..c7348e0 Binary files /dev/null and b/assets/projects/reports/aura/Deep_Rob_Final_Report.pdf differ diff --git a/assets/projects/reports/aura/Deep_Rob_Final_Report.pdf:Zone.Identifier b/assets/projects/reports/aura/Deep_Rob_Final_Report.pdf:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/artist.jpg b/assets/projects/reports/aura/artist.jpg new file mode 100644 index 0000000..58eb574 Binary files /dev/null and b/assets/projects/reports/aura/artist.jpg differ diff --git a/assets/projects/reports/aura/artist.jpg:Zone.Identifier b/assets/projects/reports/aura/artist.jpg:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/deeprob_poster.drawio.png b/assets/projects/reports/aura/deeprob_poster.drawio.png new file mode 100644 index 0000000..ae8c516 Binary files /dev/null and b/assets/projects/reports/aura/deeprob_poster.drawio.png differ diff --git a/assets/projects/reports/aura/deeprob_poster.drawio.png:Zone.Identifier b/assets/projects/reports/aura/deeprob_poster.drawio.png:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/obsgen.jpg b/assets/projects/reports/aura/obsgen.jpg new file mode 100644 index 0000000..7b43ebe Binary files /dev/null and b/assets/projects/reports/aura/obsgen.jpg differ diff --git a/assets/projects/reports/aura/obsgen.jpg:Zone.Identifier b/assets/projects/reports/aura/obsgen.jpg:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/openpose.png b/assets/projects/reports/aura/openpose.png new file mode 100644 index 0000000..73ae221 Binary files /dev/null and b/assets/projects/reports/aura/openpose.png differ diff --git a/assets/projects/reports/aura/openpose.png:Zone.Identifier b/assets/projects/reports/aura/openpose.png:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/openposemap.jpg b/assets/projects/reports/aura/openposemap.jpg new file mode 100644 index 0000000..06adbb7 Binary files /dev/null and b/assets/projects/reports/aura/openposemap.jpg differ diff --git a/assets/projects/reports/aura/openposemap.jpg:Zone.Identifier b/assets/projects/reports/aura/openposemap.jpg:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/predpose.jpg b/assets/projects/reports/aura/predpose.jpg new file mode 100644 index 0000000..cf82e25 Binary files /dev/null and b/assets/projects/reports/aura/predpose.jpg differ diff --git a/assets/projects/reports/aura/predpose.jpg:Zone.Identifier b/assets/projects/reports/aura/predpose.jpg:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/aura/simlpe.png b/assets/projects/reports/aura/simlpe.png new file mode 100644 index 0000000..f7a9291 Binary files /dev/null and b/assets/projects/reports/aura/simlpe.png differ diff --git a/assets/projects/reports/aura/simlpe.png:Zone.Identifier b/assets/projects/reports/aura/simlpe.png:Zone.Identifier new file mode 100644 index 0000000..e69de29 diff --git a/assets/projects/reports/deweathered/DeepRob_Project_Group13.pdf b/assets/projects/reports/deweathered/DeepRob_Project_Group13.pdf new file mode 100644 index 0000000..134d56a Binary files /dev/null and b/assets/projects/reports/deweathered/DeepRob_Project_Group13.pdf differ diff --git a/assets/projects/reports/deweathered/head_img.png b/assets/projects/reports/deweathered/head_img.png new file mode 100644 index 0000000..6a52936 Binary files /dev/null and b/assets/projects/reports/deweathered/head_img.png differ diff --git a/assets/projects/reports/diffdet/DeepRobProjectReport.pdf b/assets/projects/reports/diffdet/DeepRobProjectReport.pdf new file mode 100644 index 0000000..a35b38d Binary files /dev/null and b/assets/projects/reports/diffdet/DeepRobProjectReport.pdf differ diff --git a/assets/projects/reports/diffdet/background.webp b/assets/projects/reports/diffdet/background.webp new file mode 100644 index 0000000..60d8bc7 Binary files /dev/null and b/assets/projects/reports/diffdet/background.webp differ diff --git a/assets/projects/reports/diffdet/f1.webp b/assets/projects/reports/diffdet/f1.webp new file mode 100644 index 0000000..f5bca9f Binary files /dev/null and b/assets/projects/reports/diffdet/f1.webp differ diff --git a/assets/projects/reports/diffdet/f2.webp b/assets/projects/reports/diffdet/f2.webp new file mode 100644 index 0000000..dfb6b76 Binary files /dev/null and b/assets/projects/reports/diffdet/f2.webp differ diff --git a/assets/projects/reports/diffdet/f3.webp b/assets/projects/reports/diffdet/f3.webp new file mode 100644 index 0000000..ec32ee3 Binary files /dev/null and b/assets/projects/reports/diffdet/f3.webp differ diff --git a/assets/projects/reports/diffdet/image1.webp b/assets/projects/reports/diffdet/image1.webp new file mode 100644 index 0000000..f288abb Binary files /dev/null and b/assets/projects/reports/diffdet/image1.webp differ diff --git a/assets/projects/reports/diffdet/image2.webp b/assets/projects/reports/diffdet/image2.webp new file mode 100644 index 0000000..5e733be Binary files /dev/null and b/assets/projects/reports/diffdet/image2.webp differ diff --git a/assets/projects/reports/diffdet/result.webp b/assets/projects/reports/diffdet/result.webp new file mode 100644 index 0000000..c2ea881 Binary files /dev/null and b/assets/projects/reports/diffdet/result.webp differ diff --git a/assets/projects/reports/diffdet/result1.webp b/assets/projects/reports/diffdet/result1.webp new file mode 100644 index 0000000..9b940a9 Binary files /dev/null and b/assets/projects/reports/diffdet/result1.webp differ diff --git a/assets/w23/projects/reports/deeprob.gif b/assets/projects/reports/example/deeprob.gif similarity index 100% rename from assets/w23/projects/reports/deeprob.gif rename to assets/projects/reports/example/deeprob.gif diff --git a/assets/projects/reports/how-to/compare_button.webp b/assets/projects/reports/how-to/compare_button.webp new file mode 100644 index 0000000..eee7ab0 Binary files /dev/null and b/assets/projects/reports/how-to/compare_button.webp differ diff --git a/assets/projects/reports/how-to/development_environment.webp b/assets/projects/reports/how-to/development_environment.webp new file mode 100644 index 0000000..f8dd10f Binary files /dev/null and b/assets/projects/reports/how-to/development_environment.webp differ diff --git a/assets/projects/reports/how-to/fork_button.webp b/assets/projects/reports/how-to/fork_button.webp new file mode 100644 index 0000000..9a6c38c Binary files /dev/null and b/assets/projects/reports/how-to/fork_button.webp differ diff --git a/assets/projects/reports/how-to/fork_options.webp b/assets/projects/reports/how-to/fork_options.webp new file mode 100644 index 0000000..0e6b824 Binary files /dev/null and b/assets/projects/reports/how-to/fork_options.webp differ diff --git a/assets/projects/reports/how-to/pull_request_button.webp b/assets/projects/reports/how-to/pull_request_button.webp new file mode 100644 index 0000000..6dbd0e5 Binary files /dev/null and b/assets/projects/reports/how-to/pull_request_button.webp differ diff --git a/assets/projects/reports/lane/FPN.png b/assets/projects/reports/lane/FPN.png new file mode 100644 index 0000000..d45e541 Binary files /dev/null and b/assets/projects/reports/lane/FPN.png differ diff --git a/assets/projects/reports/leaf/Rob498FinalReport.pdf b/assets/projects/reports/leaf/Rob498FinalReport.pdf new file mode 100644 index 0000000..64896d3 Binary files /dev/null and b/assets/projects/reports/leaf/Rob498FinalReport.pdf differ diff --git a/assets/projects/reports/leaf/StudentTeacher.jpg b/assets/projects/reports/leaf/StudentTeacher.jpg new file mode 100644 index 0000000..8c419f7 Binary files /dev/null and b/assets/projects/reports/leaf/StudentTeacher.jpg differ diff --git a/assets/projects/reports/leaf/StudentTeacher_Train_Loss.png b/assets/projects/reports/leaf/StudentTeacher_Train_Loss.png new file mode 100644 index 0000000..6151b9a Binary files /dev/null and b/assets/projects/reports/leaf/StudentTeacher_Train_Loss.png differ diff --git a/assets/projects/reports/leaf/StudentTeacher_Val_Acc.png b/assets/projects/reports/leaf/StudentTeacher_Val_Acc.png new file mode 100644 index 0000000..1c95a80 Binary files /dev/null and b/assets/projects/reports/leaf/StudentTeacher_Val_Acc.png differ diff --git a/assets/slides/deeprob_discussion_00.pdf b/assets/slides/deeprob_discussion_00.pdf new file mode 100644 index 0000000..75a89ff Binary files /dev/null and b/assets/slides/deeprob_discussion_00.pdf differ diff --git a/assets/slides/deeprob_discussion_01.pdf b/assets/slides/deeprob_discussion_01.pdf new file mode 100644 index 0000000..fced095 Binary files /dev/null and b/assets/slides/deeprob_discussion_01.pdf differ diff --git a/assets/slides/deeprob_discussion_02.pdf b/assets/slides/deeprob_discussion_02.pdf new file mode 100644 index 0000000..2639cbb Binary files /dev/null and b/assets/slides/deeprob_discussion_02.pdf differ diff --git a/assets/slides/deeprob_discussion_03.pdf b/assets/slides/deeprob_discussion_03.pdf new file mode 100644 index 0000000..47c425e Binary files /dev/null and b/assets/slides/deeprob_discussion_03.pdf differ diff --git a/assets/slides/deeprob_discussion_06.pdf b/assets/slides/deeprob_discussion_06.pdf new file mode 100644 index 0000000..5b83880 Binary files /dev/null and b/assets/slides/deeprob_discussion_06.pdf differ diff --git a/assets/slides/deeprob_discussion_07.pdf b/assets/slides/deeprob_discussion_07.pdf new file mode 100644 index 0000000..77139ae Binary files /dev/null and b/assets/slides/deeprob_discussion_07.pdf differ diff --git a/assets/slides/deeprob_lecture_01_introduction.pdf b/assets/slides/deeprob_lecture_01_introduction.pdf new file mode 100644 index 0000000..02c994c Binary files /dev/null and b/assets/slides/deeprob_lecture_01_introduction.pdf differ diff --git a/assets/slides/deeprob_lecture_02_image_classification.pdf b/assets/slides/deeprob_lecture_02_image_classification.pdf new file mode 100644 index 0000000..4f898a4 Binary files /dev/null and b/assets/slides/deeprob_lecture_02_image_classification.pdf differ diff --git a/assets/slides/deeprob_lecture_03_linear_classifiers.pdf b/assets/slides/deeprob_lecture_03_linear_classifiers.pdf new file mode 100644 index 0000000..d5f5244 Binary files /dev/null and b/assets/slides/deeprob_lecture_03_linear_classifiers.pdf differ diff --git a/assets/slides/deeprob_lecture_04_optimization.pdf b/assets/slides/deeprob_lecture_04_optimization.pdf new file mode 100644 index 0000000..b3dc41f Binary files /dev/null and b/assets/slides/deeprob_lecture_04_optimization.pdf differ diff --git a/assets/slides/deeprob_lecture_05_neural_networks.pdf b/assets/slides/deeprob_lecture_05_neural_networks.pdf new file mode 100644 index 0000000..846799c Binary files /dev/null and b/assets/slides/deeprob_lecture_05_neural_networks.pdf differ diff --git a/assets/slides/deeprob_lecture_06_backpropagation.pdf b/assets/slides/deeprob_lecture_06_backpropagation.pdf new file mode 100644 index 0000000..d2e935f Binary files /dev/null and b/assets/slides/deeprob_lecture_06_backpropagation.pdf differ diff --git a/assets/slides/deeprob_lecture_07_convolutional_networks.pdf b/assets/slides/deeprob_lecture_07_convolutional_networks.pdf new file mode 100644 index 0000000..9826c55 Binary files /dev/null and b/assets/slides/deeprob_lecture_07_convolutional_networks.pdf differ diff --git a/assets/slides/deeprob_lecture_08_cnn_architectures.pdf b/assets/slides/deeprob_lecture_08_cnn_architectures.pdf new file mode 100644 index 0000000..037e335 Binary files /dev/null and b/assets/slides/deeprob_lecture_08_cnn_architectures.pdf differ diff --git a/assets/slides/deeprob_lecture_09_training_neural_networks.pdf b/assets/slides/deeprob_lecture_09_training_neural_networks.pdf new file mode 100644 index 0000000..9387746 Binary files /dev/null and b/assets/slides/deeprob_lecture_09_training_neural_networks.pdf differ diff --git a/assets/slides/deeprob_lecture_10_training_neural_networks.pdf b/assets/slides/deeprob_lecture_10_training_neural_networks.pdf new file mode 100644 index 0000000..94b8488 Binary files /dev/null and b/assets/slides/deeprob_lecture_10_training_neural_networks.pdf differ diff --git a/assets/slides/deeprob_lecture_11_object_detection.pdf b/assets/slides/deeprob_lecture_11_object_detection.pdf new file mode 100644 index 0000000..c64ce6d Binary files /dev/null and b/assets/slides/deeprob_lecture_11_object_detection.pdf differ diff --git a/assets/slides/deeprob_lecture_12_object_detection.pdf b/assets/slides/deeprob_lecture_12_object_detection.pdf new file mode 100644 index 0000000..7af147e Binary files /dev/null and b/assets/slides/deeprob_lecture_12_object_detection.pdf differ diff --git a/assets/slides/deeprob_lecture_13_pose_estimation.pdf b/assets/slides/deeprob_lecture_13_pose_estimation.pdf new file mode 100644 index 0000000..ccc6030 Binary files /dev/null and b/assets/slides/deeprob_lecture_13_pose_estimation.pdf differ diff --git a/assets/slides/deeprob_lecture_14_final_project_showcase.pdf b/assets/slides/deeprob_lecture_14_final_project_showcase.pdf new file mode 100644 index 0000000..7f91819 Binary files /dev/null and b/assets/slides/deeprob_lecture_14_final_project_showcase.pdf differ diff --git a/assets/slides/deeprob_lecture_15_deep_learning_software.pdf b/assets/slides/deeprob_lecture_15_deep_learning_software.pdf new file mode 100644 index 0000000..cb61c16 Binary files /dev/null and b/assets/slides/deeprob_lecture_15_deep_learning_software.pdf differ diff --git a/assets/slides/deeprob_lecture_16_sequences.pdf b/assets/slides/deeprob_lecture_16_sequences.pdf new file mode 100644 index 0000000..114cb2b Binary files /dev/null and b/assets/slides/deeprob_lecture_16_sequences.pdf differ diff --git a/assets/slides/deeprob_lecture_17_attention.pdf b/assets/slides/deeprob_lecture_17_attention.pdf new file mode 100644 index 0000000..f04b9b2 Binary files /dev/null and b/assets/slides/deeprob_lecture_17_attention.pdf differ diff --git a/assets/slides/deeprob_lecture_18_vision_transformers.pdf b/assets/slides/deeprob_lecture_18_vision_transformers.pdf new file mode 100644 index 0000000..70f5372 Binary files /dev/null and b/assets/slides/deeprob_lecture_18_vision_transformers.pdf differ diff --git a/assets/slides/deeprob_lecture_19_transformers.pdf b/assets/slides/deeprob_lecture_19_transformers.pdf new file mode 100644 index 0000000..0ab7622 Binary files /dev/null and b/assets/slides/deeprob_lecture_19_transformers.pdf differ diff --git a/assets/slides/deeprob_lecture_20_3d_vision.pdf b/assets/slides/deeprob_lecture_20_3d_vision.pdf new file mode 100644 index 0000000..ae4a715 Binary files /dev/null and b/assets/slides/deeprob_lecture_20_3d_vision.pdf differ diff --git a/assets/slides/deeprob_lecture_21_nerf_3dgs.pdf b/assets/slides/deeprob_lecture_21_nerf_3dgs.pdf new file mode 100644 index 0000000..977c119 Binary files /dev/null and b/assets/slides/deeprob_lecture_21_nerf_3dgs.pdf differ diff --git a/assets/slides/deeprob_lecture_22_visualizing.pdf b/assets/slides/deeprob_lecture_22_visualizing.pdf new file mode 100644 index 0000000..0e467c9 Binary files /dev/null and b/assets/slides/deeprob_lecture_22_visualizing.pdf differ diff --git a/assets/slides/templates/DeepRob.kth b/assets/slides/templates/DeepRob.kth old mode 100755 new mode 100644 index 842e174..b3b4528 Binary files a/assets/slides/templates/DeepRob.kth and b/assets/slides/templates/DeepRob.kth differ diff --git a/assets/slides/templates/DeepRob_Paper_Presentation.key b/assets/slides/templates/DeepRob_Paper_Presentation.key old mode 100755 new mode 100644 index fa70b01..4cb1392 Binary files a/assets/slides/templates/DeepRob_Paper_Presentation.key and b/assets/slides/templates/DeepRob_Paper_Presentation.key differ diff --git a/assets/slides/templates/DeepRob_Paper_Presentation.pptx b/assets/slides/templates/DeepRob_Paper_Presentation.pptx index 3da1da0..bfe0ce9 100644 Binary files a/assets/slides/templates/DeepRob_Paper_Presentation.pptx and b/assets/slides/templates/DeepRob_Paper_Presentation.pptx differ diff --git a/assets/videos/banner.webm b/assets/videos/banner.webm new file mode 100644 index 0000000..758ba55 Binary files /dev/null and b/assets/videos/banner.webm differ diff --git a/assets/w23/projects/reports/da-trash/arch.webp b/assets/w23/projects/reports/da-trash/arch.webp deleted file mode 100644 index 1db85a5..0000000 Binary files a/assets/w23/projects/reports/da-trash/arch.webp and /dev/null differ diff --git a/assets/w23/projects/reports/da-trash/da-trash.pdf b/assets/w23/projects/reports/da-trash/da-trash.pdf deleted file mode 100644 index d6cf3d7..0000000 Binary files a/assets/w23/projects/reports/da-trash/da-trash.pdf and /dev/null differ diff --git a/assets/w23/projects/reports/da-trash/qualitative-1.webp b/assets/w23/projects/reports/da-trash/qualitative-1.webp deleted file mode 100644 index 42f09e5..0000000 Binary files a/assets/w23/projects/reports/da-trash/qualitative-1.webp and /dev/null differ diff --git a/assets/w23/projects/reports/da-trash/qualitative-2.webp b/assets/w23/projects/reports/da-trash/qualitative-2.webp deleted file mode 100644 index 8e7198b..0000000 Binary files a/assets/w23/projects/reports/da-trash/qualitative-2.webp and /dev/null differ diff --git a/assets/w23/projects/reports/da-trash/table.ii.webp b/assets/w23/projects/reports/da-trash/table.ii.webp deleted file mode 100644 index 2d0b170..0000000 Binary files a/assets/w23/projects/reports/da-trash/table.ii.webp and /dev/null differ diff --git a/calendar.md b/calendar.md index c5048c8..fdaaa0b 100644 --- a/calendar.md +++ b/calendar.md @@ -7,6 +7,7 @@ nav_order: 3 # Calendar + {% for module in site.modules %} {{ module }} {% endfor %} diff --git a/datasets/index.md b/datasets/index.md index 89af428..a2d02c7 100644 --- a/datasets/index.md +++ b/datasets/index.md @@ -2,7 +2,7 @@ layout: page title: Datasets description: Overview of the robot perception datasets used throughout the DeepRob course. -nav_order: 5 +nav_order: 7 has_children: true has_toc: false --- @@ -14,19 +14,19 @@ At the core of Deep Learning is a set of computational techniques to identify an The PROPS datasets consist of downsampled versions of data collected from the [ProgressLabeller annotation tool (Chen et al., 2022)](https://arxiv.org/abs/2203.00283){: target="_blank" rel="noopener noreferrer"}. This dataset focuses on table-top scenes that are inspired by the environments a domestic service robot would be expected to encounter. Objects in these scenes are from the [YCB Object and Model Set (Calli et al., 2015)](https://ieeexplore.ieee.org/abstract/document/7251504){: target="_blank" rel="noopener noreferrer"}. -[Course projects](/projects/) in DeepRob are built using the PROPS datasets. +[Course projects]({{ site.baseurl }}/projects/) in DeepRob are built using the PROPS datasets. -## [PROPS Classification](/datasets/props-classification/) +## [PROPS Classification]({{ site.baseurl }}/datasets/props-classification/) This portion of the dataset is catered for image classification tasks. The format of this portion is based on that of the [CIFAR-10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html){: target="_blank" rel="noopener noreferrer"}. The PROPS Classification dataset contains 10 object categories with 50K training images and 10K testing images. Each image in the dataset is a 32x32 RGB color image. All images in the test set are taken from scenes not represented in the training set. -## [PROPS Detection](/datasets/props-detection/) +## [PROPS Detection]({{ site.baseurl }}/datasets/props-detection/) This portion of the dataset is catered for object detection tasks. The PROPS Detection dataset contains 10 object categories with 2.5K training images and 2.5K validation images. Each image in the dataset is a 640x480 RGB color image. All images in the validation set are taken from scenes not represented in the training set. -## [PROPS Pose Estimation](/datasets/props-pose/) +## [PROPS Pose Estimation]({{ site.baseurl }}/datasets/props-pose/) This portion of the dataset is catered for 6 degrees-of-freedom rigid body object pose estimation. The PROPS Pose dataset contains 10 object categories with 500 training images and 500 validation images. Each image in the dataset is a 640x480 RGB color image. Aligned depth images and segmentation masks are also included in the dataset. diff --git a/datasets/props-classification.md b/datasets/props-classification.md index 0905979..6f7ac26 100644 --- a/datasets/props-classification.md +++ b/datasets/props-classification.md @@ -12,12 +12,12 @@ This portion of the dataset is catered for image classification tasks. The forma ## Download -The dataset is available for download on [Google Drive ![](/assets/logos/logo_drive_2020q4_color_2x_web_64dp.png){: .text-logo }](https://drive.google.com/file/d/1C8_JFsnPVm392C-S1rH0y4HFfNkdMlXi/view?usp=share_link){: target="_blank" rel="noopener noreferrer"}. +The dataset is available for download on [Google Drive ![]({{ site.baseurl }}/assets/logos/logo_drive_2020q4_color_2x_web_64dp.png){: .text-logo }](https://drive.google.com/file/d/1C8_JFsnPVm392C-S1rH0y4HFfNkdMlXi/view?usp=share_link){: target="_blank" rel="noopener noreferrer"}. -We provide the [`PROPSClassificationDataset`](/assets/projects/PROPSClassificationDataset.py), a PyTorch dataset class, to support development with and use of the PROPS Classification dataset. +We provide the [`PROPSClassificationDataset`]({{ site.baseurl }}/assets/projects/PROPSClassificationDataset.py), a PyTorch dataset class, to support development with and use of the PROPS Classification dataset. ## Examples Sample images of each category in the PROPS Classification dataset are included below: -![Sample images from PROPS classification dataset](/assets/images/props_classification.webp){: .data-img } +![Sample images from PROPS classification dataset]({{ site.baseurl }}/assets/images/props_classification.webp){: .data-img } diff --git a/datasets/props-detection.md b/datasets/props-detection.md index 55ebbf9..669d7e5 100644 --- a/datasets/props-detection.md +++ b/datasets/props-detection.md @@ -12,12 +12,12 @@ This portion of the dataset is catered for object detection tasks. The PROPS Det ## Download -The dataset is available for download on [Google Drive ![](/assets/logos/logo_drive_2020q4_color_2x_web_64dp.png){: .text-logo }](https://drive.google.com/file/d/1vG7_O-1JcYAgixdnV_n0QuFCt2R0050j/view?usp=share_link){: target="_blank" rel="noopener noreferrer"}. +The dataset is available for download on [Google Drive ![]({{ site.baseurl }}/assets/logos/logo_drive_2020q4_color_2x_web_64dp.png){: .text-logo }](https://drive.google.com/file/d/1vG7_O-1JcYAgixdnV_n0QuFCt2R0050j/view?usp=share_link){: target="_blank" rel="noopener noreferrer"}. -We provide the [`PROPSDetectionDataset`](/assets/projects/PROPSDetectionDataset.py), a PyTorch dataset class, to support development with and use of the PROPS Detection dataset. +We provide the [`PROPSDetectionDataset`]({{ site.baseurl }}/assets/projects/PROPSDetectionDataset.py), a PyTorch dataset class, to support development with and use of the PROPS Detection dataset. ## Examples Sample images of each category in the PROPS Detection dataset are included below: -![Sample animation showing PROPS detection samples](/assets/images/props_detection.gif){: .data-img } +![Sample animation showing PROPS detection samples]({{ site.baseurl }}/assets/images/props_detection.gif){: .data-img } diff --git a/datasets/props-pose.md b/datasets/props-pose.md index d9db6ab..fe63147 100644 --- a/datasets/props-pose.md +++ b/datasets/props-pose.md @@ -12,12 +12,12 @@ This portion of the dataset is catered for 6 degrees-of-freedom rigid body objec ## Download -The dataset is available for download on [Google Drive ![](/assets/logos/logo_drive_2020q4_color_2x_web_64dp.png){: .text-logo }](https://drive.google.com/file/d/15rhwXhzHGKtBcxJAYMWJG7gN7BLLhyAq/view?usp=share_link){: target="_blank" rel="noopener noreferrer"}. +The dataset is available for download on [Google Drive ![]({{ site.baseurl }}/assets/logos/logo_drive_2020q4_color_2x_web_64dp.png){: .text-logo }](https://drive.google.com/file/d/15rhwXhzHGKtBcxJAYMWJG7gN7BLLhyAq/view?usp=share_link){: target="_blank" rel="noopener noreferrer"}. -We provide the [`PROPSPoseDataset`](/assets/projects/PROPSPoseDataset.py), a PyTorch dataset class, to support development with and use of the PROPS Pose dataset. +We provide the [`PROPSPoseDataset`]({{ site.baseurl }}/assets/projects/PROPSPoseDataset.py), a PyTorch dataset class, to support development with and use of the PROPS Pose dataset. ## Examples Sample images and labels for each category in the PROPS Pose dataset are included below: -![Sample animation showing PROPS pose samples](/assets/images/props_pose.gif){: .data-img } +![Sample animation showing PROPS pose samples]({{ site.baseurl }}/assets/images/props_pose.gif){: .data-img } diff --git a/favicon.ico b/favicon.ico index 55e1583..85c7865 100644 Binary files a/favicon.ico and b/favicon.ico differ diff --git a/home.md b/home.md index 9790b3d..5dd205f 100644 --- a/home.md +++ b/home.md @@ -1,36 +1,118 @@ --- layout: page title: Home -description: A course covering the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision that enable robots to physically manipulate objects. ROB 498-002 and ROB 599-009 at the University of Michigan. +description: A course covering the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision that enable robots to physically manipulate objects. ROB 498-004 and ROB 599-004 at the University of Michigan. nav_order: 1 permalink: / --- -# DeepRob: Deep Learning for Robot Perception -ROB 498-011 & 599-011, Winter 2024 at The University of Michigan -{: .fs-6 .fw-300 } + + -{: .highlight } -This website describes a course still under development. All schedules, syllabus, and plans described on this site are tentative. -This course covers the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision that enable robots to physically manipulate objects. During the first part of this course, students will learn to implement, train and debug their own neural networks. During the second part of this course, students will explore recent emerging topics in deep learning for robot perception and manipulation. This exploration will include analysis of research publications in the area, building up to reproducing one of these publications for implementation as a final course project. +This course covers the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision that enable robots to physically manipulate objects. During the first part of this course, students will learn to implement, train and debug their own neural networks. During the second part of this course, students will explore recent emerging topics in deep learning for robot perception and manipulation. This exploration will include analysis of research publications in the area, building up to reproducing and implementing state-of-the-art deep learning approaches as a final course project. This course is being offered at [the University of Michigan](https://umich.edu/){: target="_blank" rel="noopener noreferrer"} ([Xiaoxiao Du](https://xiaoxiaodu.net){: target="_blank" rel="noopener noreferrer"}, [Anthony Opipari](https://topipari.com){: target="_blank" rel="noopener noreferrer"}, [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"}). This course builds on and is indebted to these existing courses (as a β€œstar” and a "fork" in the open source sense): -- [University of Michigan - ROB 498-002 / 599-009: Deep Learning for Robot Perception](/w23/){: target="_blank" rel="noopener noreferrer"} instructed by [Anthony Opipari](https://web.eecs.umich.edu/~justincj/){: target="_blank" rel="noopener noreferrer"}, [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"}, and [Karthik Desingh](https://karthikdesingh.com/){: target="_blank" rel="noopener noreferrer"} +- [University of Michigan - ROB 498-011 / 599-011: Deep Learning for Robot Perception](/w24/){: target="_blank" rel="noopener noreferrer"} instructed by [Xiaoxiao Du](https://xiaoxiaodu.net){: target="_blank" rel="noopener noreferrer"}, [Anthony Opipari](https://topipari.com/){: target="_blank" rel="noopener noreferrer"}, and [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"} +- [University of Michigan - ROB 498-002 / 599-009: Deep Learning for Robot Perception](/w23/){: target="_blank" rel="noopener noreferrer"} instructed by [Anthony Opipari](https://topipari.com/){: target="_blank" rel="noopener noreferrer"}, [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"}, and [Karthik Desingh](https://karthikdesingh.com/){: target="_blank" rel="noopener noreferrer"} - [University of Michigan - EECS 498-007 / 598-005: Deep Learning for Computer Vision](https://web.eecs.umich.edu/~justincj/teaching/eecs498/WI2022/){: target="_blank" rel="noopener noreferrer"} instructed by [Justin Johnson](https://web.eecs.umich.edu/~justincj/){: target="_blank" rel="noopener noreferrer"} - [Stanford - CS231n: Deep Learning for Computer Vision](http://cs231n.stanford.edu/index.html){: target="_blank" rel="noopener noreferrer"} instructed by [Fei-Fei Li](https://profiles.stanford.edu/fei-fei-li){: target="_blank" rel="noopener noreferrer"} and [Andrej Karpathy](https://karpathy.ai/){: target="_blank" rel="noopener noreferrer"} --- -
-
# Instructors +
{% assign instructors = site.staffers | where: 'role', 'Instructor' |sort: 'order' %} {% for staffer in instructors %} @@ -38,19 +120,58 @@ This course builds on and is indebted to these existing courses (as a β€œstar” {% endfor %}
+ +# Graduate Student Instructor +
+ +{% assign gsis = site.staffers | where: 'role', 'Graduate Student Instructor' |sort: 'order' %} +{% assign num_gsis = gsis | size %} +{% if num_gsis != 0 %} + +{% for staffer in gsis %} +{{ staffer }} +{% endfor %} +{% endif %} + +
+ +# Instructional Assistants
-# Research Associate +{% assign ias = site.staffers | where: 'role', 'Instructional Assistant' | sort: 'order' %} +{% for staffer in ias %} +{{ staffer }} +{% endfor %} + +
-{% assign research_associates = site.staffers | where: 'role', 'Research Associate' %} -{% assign num_research_associates = research_associates | size %} -{% if num_research_associates != 0 %} +# Advising Faculty +
-{% for staffer in research_associates %} +{% assign advising_faculty = site.staffers | where: 'role', 'Advising Faculty' %} +{% assign num_advising_faculty = advising_faculty | size %} +{% if num_advising_faculty != 0 %} + +{% for staffer in advising_faculty %} {{ staffer }} {% endfor %} {% endif %}
+ +--- + +# Office Hours Schedule +{: #weekly-schedule } + +
+{: .highlight } +**The schedule of instructor office hours, including the in-person locations, is provided in the following Google calendar.** +
+ +
+{: .note } +**For accessing office hours virtually, please refer to the calendar for each instructor's preferred Zoom link. If no Zoom link is listed, please join their office hours queue and share your personal Zoom link as your location.**
+ diff --git a/office-hours.md b/office-hours.md new file mode 100644 index 0000000..0e1595d --- /dev/null +++ b/office-hours.md @@ -0,0 +1,24 @@ +--- +layout: page +title: Office Hours +description: Weekly meeting schedule for the course, Deep Learning for Robot Perception at the University of Michigan. +nav_order: 4 +--- + + + + +# Office Hours Schedule +{: #weekly-schedule } + +
+{: .highlight } +**The schedule of instructor office hours, including the in-person locations, is provided in the following Google calendar.** +
+ +
+{: .note } +**For accessing office hours virtually, please refer to the calendar for each instructor's preferred Zoom link. If no Zoom link is listed, please join their office hours queue and share your personal Zoom link as your location.** +
+ + \ No newline at end of file diff --git a/papers/index.md b/papers/index.md index e425313..9c1312d 100644 --- a/papers/index.md +++ b/papers/index.md @@ -2,7 +2,7 @@ layout: page title: Papers description: Collection of deep learning research papers with coverage in perception and associated robotic tasks. -nav_order: 4 +nav_order: 5 has_children: false has_toc: true --- @@ -10,7 +10,7 @@ has_toc: true # Deep Learning Research Papers for Robot Perception {:.no_toc} -A collection of deep learning research papers with coverage in perception and associated robotic tasks. Within each research area outlined below, the course staff has identified a *core* and *extended* set of research papers. The *core* set of papers will form the basis of our seminar-style lectures starting in [week 8](/calendar/#week-8-3d-perception). The *extended* set provides additional coverage of even more exciting work being done within each area. +A collection of deep learning research papers with coverage in perception and associated robotic tasks. Within each research area outlined below, the course staff has identified a *core* and *extended* set of research papers. The *core* set of papers will form the basis of our seminar-style lectures starting in [week 8]({{ site.baseurl }}/calendar/#week-8-3d-perception). The *extended* set provides additional coverage of even more exciting work being done within each area. --- @@ -24,7 +24,6 @@ A collection of deep learning research papers with coverage in perception and as # RGB-D Architectures -[Scheduled Week 8, Lec 14](/calendar/#lec-14) ### Core List {: .no_toc } @@ -58,7 +57,6 @@ A collection of deep learning research papers with coverage in perception and as # Point Cloud Processing -[Scheduled Week 8, Lec 15](/calendar/#lec-15) ### Core List {: .no_toc } @@ -89,7 +87,6 @@ A collection of deep learning research papers with coverage in perception and as # Object Pose, Geometry, SDF, Implicit surfaces -[Scheduled Week 9, Lec 16](/calendar/#lec-16) ### Core List {: .no_toc } @@ -117,7 +114,6 @@ A collection of deep learning research papers with coverage in perception and as # Dense Descriptors, Category-level Representations -[Scheduled Week 9, Lec 17](/calendar/#lec-17) ### Core List {: .no_toc } @@ -143,7 +139,6 @@ A collection of deep learning research papers with coverage in perception and as # Recurrent Networks and Object Tracking -[Scheduled Week 10, Lec 18](/calendar/#lec-18) ### Core List {: .no_toc } @@ -168,9 +163,8 @@ A collection of deep learning research papers with coverage in perception and as - [RNNPose: Recurrent 6-DoF Object Pose Refinement with Robust Correspondence Field Estimation and Pose Optimization](https://arxiv.org/abs/2203.12870v3){: target="_blank" rel="noopener noreferrer"}, Xu et al., 2022 -# Visual Odometry and Localization -[Scheduled Week 10, Lec 19](/calendar/#lec-19) +# Visual Odometry and Localization ### Core List {: .no_toc } @@ -210,8 +204,6 @@ A collection of deep learning research papers with coverage in perception and as # Semantic Scene Graphs and Explicit Representations -[Scheduled Week 11, Lec 20](/calendar/#lec-20) - ### Core List {: .no_toc } @@ -242,8 +234,6 @@ A collection of deep learning research papers with coverage in perception and as # Neural Radiance Fields and Implicit Representations -[Scheduled Week 11, Lec 21](/calendar/#lec-21) - ### Core List {: .no_toc } @@ -253,9 +243,9 @@ A collection of deep learning research papers with coverage in perception and as 3. [NeRF-SLAM: Real-Time Dense Monocular SLAM with Neural Radiance Fields](https://arxiv.org/abs/2210.13641){: target="_blank" rel="noopener noreferrer"}, Rosinol et al., 2022 -4. [NeRF-Supervision: Learning Dense Object Descriptors from Neural Radiance Fields](https://yenchenlin.me/nerf-supervision/){: target="_blank" rel="noopener noreferrer"}, Yen-Chen et al., 2022 +4. [NARF22: Neural Articulated Radiance Fields for Configuration-Aware Rendering](https://arxiv.org/abs/2210.01166){: target="_blank" rel="noopener noreferrer"}, Lewis et al., 2022 -5. [NARF22: Neural Articulated Radiance Fields for Configuration-Aware Rendering](https://arxiv.org/abs/2210.01166){: target="_blank" rel="noopener noreferrer"}, Lewis et al., 2022 +5. [Distilled Feature Fields Enable Few-Shot Language-Guided Manipulation](https://f3rm.csail.mit.edu){: target="_blank" rel="noopener noreferrer"}, Shen et al., 2023 ### Extended List @@ -281,11 +271,12 @@ A collection of deep learning research papers with coverage in perception and as - [Block-NeRF: Scalable Large Scene Neural View Synthesis](https://arxiv.org/abs/2202.05263){: target="_blank" rel="noopener noreferrer"}, Tancik et al., 2022 +- [NeRF-Supervision: Learning Dense Object Descriptors from Neural Radiance Fields](https://yenchenlin.me/nerf-supervision/){: target="_blank" rel="noopener noreferrer"}, Yen-Chen et al., 2022 +- [Language Embedded Radiance Fields](https://www.lerf.io){: target="_blank" rel="noopener noreferrer"}, Kerr et al., 2023 -# Datasets -[Scheduled Week 12, Lec 22](/calendar/#lec-22) +# Datasets ### Core List {: .no_toc } @@ -352,6 +343,8 @@ A collection of deep learning research papers with coverage in perception and as - [NVIDIA Isaac Sim](https://developer.nvidia.com/isaac-sim){: target="_blank" rel="noopener noreferrer"} +- [CARLA: An Open Urban Driving Simulator](https://carla.org){: target="_blank" rel="noopener noreferrer"}, Dosovitskiy et al., 2017 + - [SoftGym: Benchmarking Deep Reinforcement Learning for Deformable Object Manipulation](https://arxiv.org/abs/2011.07215){: target="_blank" rel="noopener noreferrer"}, Lin et al., 2020 - [ManiSkill2: A Unified Benchmark for Generalizable Manipulation Skills](https://arxiv.org/abs/2302.04659){: target="_blank" rel="noopener noreferrer"}, Gu et al., 2023 @@ -359,8 +352,6 @@ A collection of deep learning research papers with coverage in perception and as # Self-Supervised Learning -[Scheduled Week 12, Lec 23](/calendar/#lec-23) - ### Core List {: .no_toc } @@ -384,8 +375,6 @@ A collection of deep learning research papers with coverage in perception and as # Grasp Pose Detection -[Scheduled Week 13, Lec 24](/calendar/#lec-24) - ### Core List {: .no_toc } @@ -416,8 +405,6 @@ A collection of deep learning research papers with coverage in perception and as # Tactile Perception for Grasping and Manipulation -[Scheduled Week 13, Lec 25](/calendar/#lec-25) - ### Core List {: .no_toc } @@ -454,9 +441,7 @@ A collection of deep learning research papers with coverage in perception and as - [Learning to Grasp the Ungraspable with Emergent Extrinsic Dexterity](https://arxiv.org/abs/2211.01500){: target="_blank" rel="noopener noreferrer"}, Zhou and Held, 2022 -# Pre-training for Robot Manipulation and Transformer Architectures - -[Scheduled Week 14, Lec 26](/calendar/#lec-26) +# Pre-training for Robot Manipulation ### Core List {: .no_toc } @@ -497,10 +482,33 @@ A collection of deep learning research papers with coverage in perception and as +# Perception Beyond Vision -# More Frontiers +### Specialized Sensors +{: .no_toc } + +- [Pigeons (Columba livia) as Trainable Observers of Pathology and Radiology Breast Cancer Images](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0141357){: target="_blank" rel="noopener noreferrer"}, Levenson et al., 2015 + +- [Automatic color correction for 3D reconstruction of underwater scenes](https://ieeexplore.ieee.org/abstract/document/7989601){: target="_blank" rel="noopener noreferrer"}, Skinner et al., 2017 + +- [GelSight: High-Resolution Robot Tactile Sensors for Estimating Geometry and Force](https://www.mdpi.com/1424-8220/17/12/2762){: target="_blank" rel="noopener noreferrer"}, Yuan et al., 2017 + +- [Classification of Household Materials via Spectroscopy](https://arxiv.org/abs/1805.04051){: target="_blank" rel="noopener noreferrer"}, Erickson et al., 2018 + +- [Through-Wall Human Pose Estimation Using Radio Signals](https://openaccess.thecvf.com/content_cvpr_2018/papers/Zhao_Through-Wall_Human_Pose_CVPR_2018_paper.pdf){: target="_blank" rel="noopener noreferrer"}, Zhao et al., 2018 + +- [A bio-hybrid odor-guided autonomous palm-sized air vehicle](https://iopscience.iop.org/article/10.1088/1748-3190/abbd81){: target="_blank" rel="noopener noreferrer"}, Anderson et al., 2020 + +- [Event-based, Direct Camera Tracking from a Photometric 3D Map using Nonlinear Optimization](https://rpg.ifi.uzh.ch/docs/ICRA19_Bryner.pdf){: target="_blank" rel="noopener noreferrer"}, Bryner et al., 2019 -[Scheduled Week 14, Lec 27](/calendar/#lec-27) +- [SoundSpaces: Audio-Visual Navigation in 3D Environments](https://arxiv.org/abs/1912.11474){: target="_blank" rel="noopener noreferrer"}, Chen et al., 2019 + +- [Neural Implicit Surface Reconstruction using Imaging Sonar](https://arxiv.org/abs/2209.08221){: target="_blank" rel="noopener noreferrer"}, Qadri et al., 2022 + + + + +# More Frontiers ### Interpreting Deep Learning Models @@ -578,24 +586,6 @@ A collection of deep learning research papers with coverage in perception and as - [HexPlane: A Fast Representation for Dynamic Scenes](https://arxiv.org/abs/2301.09632){: target="_blank" rel="noopener noreferrer"}, Cao and Johnson, 2023 -### Specialized Sensors - -- [Pigeons (Columba livia) as Trainable Observers of Pathology and Radiology Breast Cancer Images](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0141357){: target="_blank" rel="noopener noreferrer"}, Levenson et al., 2015 - -- [Automatic color correction for 3D reconstruction of underwater scenes](https://ieeexplore.ieee.org/abstract/document/7989601){: target="_blank" rel="noopener noreferrer"}, Skinner et al., 2017 - -- [GelSight: High-Resolution Robot Tactile Sensors for Estimating Geometry and Force](https://www.mdpi.com/1424-8220/17/12/2762){: target="_blank" rel="noopener noreferrer"}, Yuan et al., 2017 - -- [Classification of Household Materials via Spectroscopy](https://arxiv.org/abs/1805.04051){: target="_blank" rel="noopener noreferrer"}, Erickson et al., 2018 - -- [Through-Wall Human Pose Estimation Using Radio Signals](https://openaccess.thecvf.com/content_cvpr_2018/papers/Zhao_Through-Wall_Human_Pose_CVPR_2018_paper.pdf){: target="_blank" rel="noopener noreferrer"}, Zhao et al., 2018 - -- [Event-based, Direct Camera Tracking from a Photometric 3D Map using Nonlinear Optimization](https://rpg.ifi.uzh.ch/docs/ICRA19_Bryner.pdf){: target="_blank" rel="noopener noreferrer"}, Bryner et al., 2019 - -- [SoundSpaces: Audio-Visual Navigation in 3D Environments](https://arxiv.org/abs/1912.11474){: target="_blank" rel="noopener noreferrer"}, Chen et al., 2019 - -- [Neural Implicit Surface Reconstruction using Imaging Sonar](https://arxiv.org/abs/2209.08221){: target="_blank" rel="noopener noreferrer"}, Qadri et al., 2022 - ### Beyond 2D Convolutions @@ -606,6 +596,8 @@ A collection of deep learning research papers with coverage in perception and as ### Reinforcement Learning +- [Deep Reinforcement Learning from Human Preferences](https://proceedings.neurips.cc/paper_files/paper/2017/file/d5e2c0adad503c91f91df240d0cd4e49-Paper.pdf){: target="_blank" rel="noopener noreferrer"}, Christiano et al., 2017 + - [Understanding RL Vision](https://distill.pub/2020/understanding-rl-vision/){: target="_blank" rel="noopener noreferrer"}, Hilton et al., 2020 diff --git a/projects/finalproject/extension.md b/projects/finalproject/extension.md new file mode 100644 index 0000000..41aa3dd --- /dev/null +++ b/projects/finalproject/extension.md @@ -0,0 +1,42 @@ +--- +layout: page +title: Extension +parent: Final Project +grand_parent: Projects +nav_order: 2 +description: Specification of the final project for DeepRob at the University of Michigan. +--- + + +# 2   Paper Reproduction & Extension +{: #paper-reproduction } +The goal for this deliverable is to gain experience crafting a small research project. In this project your team will work to reproduce an existing state of the art deep learning algorithm then propose, develop and evaluate a technical extension that improves some aspect of the existing model (i.e. improved accuracy, reduced compute budget, increased generalization, etc.). + + - [Useful Resources](#extension-resources) + - [Algorithmic Extension Deadline](#extension-deadline) + +### Paper Reproduction +In this part of the deliverable, your team will practice the skills needed to reproduce the results of existing research. For this deliverable, your team should choose a paper related to deep learning and robot perception that you find interesting and reproduce at least one quantitative or qualitative result published in the original paper. + + +### Algorithmic Extension +In this part of the deliverable, your team will propose and develop a new component of the algorithm you reproduced. The expectation is that your extension will build on the paper reproduction deliverable by using your reproduction as a starting point and baseline for the extension. For this extension, your team should plan how to analyze your proposed extension, and execute on the plan by implementing and evaluating the ideas you propose in the [project proposal]({{ site.baseurl }}/projects/finalproject/proposal/). + + + + + +## Useful Resources +{: #extension-resources } + + - [The Seminar Papers]({{ site.baseurl }}/papers/): The collection of seminar papers was chosen to give broad coverage within deep learning for robot perception. The papers from this list can serve as great subjects for your final project. + + - [Papers With Code](https://paperswithcode.com){: target="_blank" rel="noopener noreferrer"}: A repository tracking public codebases associated with published papers and datasets in machine learning. + + + +## Paper Reproduction & Extension Deadline +{: #extension-deadline } + +**The algorithmic extension is due Monday, April 28th by 11:59PM EST.** Students in DeepRob should submit their paper reproduction and algorithmic extension along with the other final project deliverables (paper reproduction and written report) as a ZIP file via canvas. **Your submission should be documented and organized such that the course staff can replicate your extension and reported results.** + diff --git a/projects/finalproject/index.md b/projects/finalproject/index.md new file mode 100644 index 0000000..7b47218 --- /dev/null +++ b/projects/finalproject/index.md @@ -0,0 +1,45 @@ +--- +layout: page +title: Final Project +parent: Projects +nav_order: 6 +has_children: true +has_toc: false +description: Specification of the final project for DeepRob at the University of Michigan. +--- + + + +# Final Project + +## Overview + +The objective of this project is to build skills needed to understand and lead research projects in deep learning for robot perception. In this project you will read state of the art deep learning papers, reproduce results from an existing paper and finally propose then develop an extension to the existing results. + +### The goals for this project are as follows: + - Practice reading deep learning papers in your areas of interest. + - Analyze the published motivations, results, and methodologies in deep learning papers from the perspective of a roboticist. + - Propose an algorithmic extension to a state of the art deep learning model applied to robotic perception. + - Present your proposed project, clearly motivating how it is relevant to robotic perception. + - Practice reproducing the results of published work. Build an understanding of what challenges are common when reproducing published results. + - Document the results of your work in a scholarly format so that they can be shared with peers and move the field forward. + +### Project Deliverables + + 1. [Proposal Presentation]({{ site.baseurl }}/projects/finalproject/proposal/) + 2. [Algorithmic Reproduction & Extension]({{ site.baseurl }}/projects/finalproject/extension) + 3. [Written Report]({{ site.baseurl }}/projects/finalproject/report) + 3. [Project Showcase]({{ site.baseurl }}/projects/finalproject/showcase) + + +### Grading + +Overall, the final project is worth 23% of the course grade. The breakdwon by each deliverable is shown below: + + 1. Proposal Presentation: 5% + 2. Final Report and Code: 15% + 3. Showcase (Video, Website, etc.): 3% + + + + diff --git a/projects/finalproject/proposal.md b/projects/finalproject/proposal.md new file mode 100644 index 0000000..c219eb6 --- /dev/null +++ b/projects/finalproject/proposal.md @@ -0,0 +1,70 @@ +--- +layout: page +title: Proposal +parent: Final Project +grand_parent: Projects +nav_order: 1 +description: Specification of the final project for DeepRob at the University of Michigan. +--- + +# 1   Project Proposal Presentation +{: #paper-presentation } + +The goals for this deliverable are to practice preparing and delivering a concise presentation in which you and your team will propose a research project related to Deep Learning and Robot Perception. Project proposal presentations are scheduled to take place during the seminar portion of the course ([Discussion 10]({{ site.baseurl }}/calendar/#dis-10)). + + - [The structure of a presentation in robotics research](#presentation-structure) + - [Expected Presentation Format](#presentation-format) + - [Presentation Deadline](#presentation-deadline) + - [Useful Resources](#presentation-resources) + + + + +## The structure of a robotics research proposal presentation +{: #presentation-structure } +Quality research presentations will capture an audience's attention, motivate them to take an interest in the challenge at hand, demonstrate what knowledge has been or is proposed to be generated to solve the challenge, and encourage the audience to extend the presented ideas towards new challenges. For maximum affect, research presentations should be correct, clear, concise, and broadly understandable. Given the challenge of achieving all this, we suggest developing your presentations using the following section structure: + +### 1   Hot Start +{: #presentation-hot-start } +The first task of a presenter is to command the attention and interest of their audience. Use a hot start to pique the audience's interest in your talk. There will always be distrations that a presenter must compete with to keep the audience's focus, even after a successful hot start. + +### 2   Value Proposition +{: #presentation-value-proposition } +A clear value proposition will motivate the audience to keep paying attention beyond the hot start. This section of the presentation establishes what benefits can be realized by solving an existing challenge or technical problem. The proposition can be framed in language such as, "if we can solve \, we'll be able to realize \." + +### 3   Background +{: #presentation-approach } +A motivated audience will then want to know how the challenge being presented can be solved. This section of the talk should provide background on what factors have made the challenge difficult in the past. How have others tried to solve this problem? If it is a new problem, what approaches might be applicable or why are none applicable? + +### 4   Proposed Approach +{: #presentation-approach } +After setting up background context on related work, presenters should describe their key insights and the ideas they propose to develop and evaluate as a potential solution. The proposed approach may involve combining multiple techniques in a new way, in-depth analysis of an existing approach that isn't fully understood, providing new datasets for benchmarking, or some other work that will contribute to the stated project motivation. + +### 5   Project Plan +{: #presentation-resolution } +Given a proposed approach, the presenter should lay out a course of action to achieve their research project's goals. Ideally, the plan will describe a timetable with project milestones and expected results. + + + +## Expected Presentation Format +{: #presentation-format } + +**Student teams in DeepRob should prepare a 5-minute slide-based oral presentation for their assigned paper as part of the final project.** The presentation should include the following structure: motivation relating to robotics and deep learning, background on the problem being addressed, the value proposition, approach and methods, and project timeline. **Students are expected to use the provided [DeepRob Keynote Theme]({{ site.baseurl }}/assets/slides/templates/DeepRob_Paper_Presentation.key) or the provided [DeepRob PowerPoint Template]({{ site.baseurl }}/assets/slides/templates/DeepRob_Paper_Presentation.pptx) for styling your slides.** + + +## Presentation Deadline +{: #presentation-deadline } + +**Students in DeepRob should submit their presentation slides formatted as a PDF at least 3 days prior to the scheduled paper presentation date of [April 1, 2025]({{ site.baseurl }}/calendar/#dis-10).** Students should submit a copy of their slides as a PDF file via the course canvas. + + +## Useful Resources +{: #presentation-resources } + + - [How to Give a Great Research Talk](https://youtu.be/ot_McoYlwUo){: target="_blank" rel="noopener noreferrer"}: Advice from Simon Peyton Jones, Engineering Fellow at Epic Games. + + - A great resource where you can find recorded research presentations are the recent robotics conferences. For example, the recorded oral presentations from [ICRA 2022](https://events.infovaya.com/event?id=88){: target="_blank" rel="noopener noreferrer"} and [CoRL 2022](https://corl2022.org/videos-oralsessions/){: target="_blank" rel="noopener noreferrer"} can be found online. + + - [Oral Presentation Advice](https://pages.cs.wisc.edu/~markhill/conference-talk.html){: target="_blank" rel="noopener noreferrer"}: Advice from Professor Mark D. Hill at the University of Wisconsin. diff --git a/projects/finalproject/report.md b/projects/finalproject/report.md new file mode 100644 index 0000000..ac68a2a --- /dev/null +++ b/projects/finalproject/report.md @@ -0,0 +1,41 @@ +--- +layout: page +title: Report +parent: Final Project +grand_parent: Projects +nav_order: 3 +description: Specification of the final project for DeepRob at the University of Michigan. +--- + +# 3   Written Report +{: #written-report} + +The goal for this deliverable is to share the findings of your research in the form of a written paper with peers in the field. + - [Report Format](#report-format) + - [Useful Resources](#report-resources) + - [Report Deadline](#report-deadline) + +## Expected Report Format +{: #report-format } + +**Student teams in DeepRob should complete a written report documenting their paper reproduction and algorithmic extension as part of the final project.** The project report should include the following sections: a paper abstract, an introduction, a related work section, a section describing your algorithmic extension, your experimental setup and results, and finally a conclusion. Final reports are expected to be 4-6 pages, not including references. **Students are expected to typeset their reviews using LaTeX in IEEE conference style.** A [LaTeX report template]({{ site.baseurl }}/assets/projects/DeepRobProjectReport.zip) is provided for your convenience. You may write your review collaboratively using online LaTeX tools, such as [Overleaf](https://www.overleaf.com/){: target="_blank" rel="noopener noreferrer"}. + + + +## Useful Resources +{: #report-resources } + + - [How to Write a Great Research Paper](https://youtu.be/WP-FkUaOcOM){: target="_blank" rel="noopener noreferrer"}: Advice from Simon Peyton Jones, Engineering Fellow at Epic Games. + + - [Save the cat!](https://savethecat.com/products/books/save-the-cat-the-last-book-on-screenwriting-youll-ever-need){: target="_blank" rel="noopener noreferrer"}: The last book on screenwriting you’ll ever need. + + - [How to create a better research poster in less time](https://youtu.be/1RwJbhkCA58){: target="_blank" rel="noopener noreferrer"}: Advice on how to create effective research posters for once your paper has been accepted to a conference. Crucially, your posters should capture the viewers' attention and convey your key findings clearly. Also take a look at [part II](https://youtu.be/SYk29tnxASs){: target="_blank" rel="noopener noreferrer"}! + + +## Report Deadline +{: #report-deadline } + +**The written report is due Monday, April 28th by 11:59PM EST.** +Students in DeepRob should submit the following under Canvas "Final Project" assignment: +1. An IEEE-style written report (with related work, algorithmic reproduction and extension method descriptions and results). +2. Link to codes/datasets. This can be included as a section in your written report. diff --git a/projects/finalproject/showcase.md b/projects/finalproject/showcase.md new file mode 100644 index 0000000..8e00806 --- /dev/null +++ b/projects/finalproject/showcase.md @@ -0,0 +1,29 @@ +--- +layout: page +title: Showcase +parent: Final Project +grand_parent: Projects +nav_order: 4 +description: Specification of the final project for DeepRob at the University of Michigan. +--- + + +# 4   Project Showcase +{: #showcase} +The goal for this deliverable is to share the outcome of your project with your classmates and optionally the broader research community. The goal is to develop our skills for presenting our work in a compelling, precise, honest, and convincing way to others who can benefit from the ideas we produce. + + - [Expected Format](#showcase-format) + - [Showcase Deadline](#showcase-deadline) + + +## Expected Showcase Format +{: #showcase-format } + + - Webpage option: If you would like to highlight your project with a webpage that can be included in portfolios or linked on social media, for exmple as [illustrated here]({{ site.baseurl }}/reports/example/), contact Anthony to help with formatting. + - Video option: This should be a self-contained video (i.e., please include title, group members' names, brief description to methods and results). No limit on video length but should describe your projects clearly. Please make sure the instructional team have access to your video. + - Poster option: More details coming. + + +## Showcase Deadline +{: #showcase-deadline } +**The project showcase will take place in discussion section on [April 22nd]({{ site.baseurl }}/calendar/#dis-13).** \ No newline at end of file diff --git a/projects/index.md b/projects/index.md index b4b1c02..976adde 100644 --- a/projects/index.md +++ b/projects/index.md @@ -3,7 +3,7 @@ layout: page title: Projects description: >- Course project outline. -nav_order: 6 +nav_order: 8 has_children: true has_toc: false --- @@ -20,27 +20,29 @@ has_toc: false --- --> -## [Project 0](/projects/) +## [Project 0]({{ site.baseurl }}/projects/project0/) Introduction to Python, PyTorch and Google Colab -## [Project 1](/projects/) +## [Project 1]({{ site.baseurl }}/projects/project1/) Classification using K-Nearest Neighbors and Linear Models -## [Project 2](/projects/) +## [Project 2]({{ site.baseurl }}/projects/project2/) -Classification using Fully-connected Neural Networks +Classification using Fully Connected Neural Networks -## [Project 3](/projects/) +## [Project 3]({{ site.baseurl }}/projects/project3/) Classification and Detection using Convolutional Neural Networks -## [Project 4](/projects/) + -Segmentation and Pose Estimation with Deep Learning +## [Project 4]({{ site.baseurl }}/projects/project4/) -## [Final Project](/projects/) +Segmentation, Pose Estimation, and Vision Transformers + +## Final Project Published paper review, presentation, reproduction, extension and report diff --git a/projects/project0.md b/projects/project0.md new file mode 100644 index 0000000..5f47d76 --- /dev/null +++ b/projects/project0.md @@ -0,0 +1,49 @@ +--- +layout: page +title: Project 0 +parent: Projects +nav_order: 1 +description: Specification of project 0 for DeepRob at the University of Michigan. +--- + +# Project 0 + +## Overview +The objective of the first project is to gain experience working with the [Python](https://www.python.org){: target="_blank" rel="noopener noreferrer"} scripting languange and the [PyTorch](https://pytorch.org){: target="_blank" rel="noopener noreferrer"} machine learning framework using the [Google Colab](https://colab.research.google.com/){: target="_blank" rel="noopener noreferrer"} development environment. In this project you will implement a collection of functions using core functionality of PyTorch Tensor objects. + + +## Instructions + +1. Download the project starter code + - [Project 0 starter code: P0.zip]({{ site.baseurl }}/assets/projects/P0.zip) + +2. Unzip the starter code and upload to Google Drive + - Once unzipped, you should find a root directory titled 'P0'. The 'P0' directory contains all starter code and files needed to complete this project. Please upload the 'P0' directory to your [Google Drive](https://drive.google.com/){: target="_blank" rel="noopener noreferrer"}. + +3. Open the `*.ipynb` and `*.py` files and implement features + - We recommend implementing the features in a Google Colab environment. The Colab development environment can be accessed by double-clicking on each `*.ipynb` and `*.py` file within your Drive. Instructions for each feature are included in the `pytorch101.ipynb` file. + + - While working on the project, keep the following in mind: + + - The notebook and the python file have clearly marked blocks where you are expected to write code. Do not write or modify any code outside of these blocks. + - Do not add or delete cells from the notebook. You may add new cells to perform scratch computations, but you should delete them before submitting your work. + - Run all cells, and do not clear out the outputs, before submitting. You will only get credit for code that has been run. + - To avoid experiencing Colab usage limits, save and close your notebooks once finished working. + +4. Submit your implementation for Autograder feedback + - Once you have implemented a portion of the required features, you may submit your work for feedback from the Autograder. To receive feedback, download your `*.ipynb` and `*.py` files then upload them to the [Project 0 Autograder](https://autograder.io/web/project/2896){: target="_blank" rel="noopener noreferrer"}. You may submit to the Autograder for feedback up to 5 times per day. + +5. Download final implementation + - After implementing all features, save your work and download the completed `*.ipynb` and `*.py` files. + - The last cell of the `pytorch101.ipynb` notebook will generate a `uniqueid_umid_P0.zip` file. The zip file should include `pytorch101.ipynb` and `pytorch101.py` for this assignment. + +6. Submit your python and notebook files for grading + - Upload your files to the [Autograder](https://autograder.io/web/project/2896){: target="_blank" rel="noopener noreferrer"} for grading consideration. Your highest score will be used for final grades. + +## Deadline + +This project is due on Sunday, January 19th at 11:59pm EST. We suggest starting as soon as possible. + +## Grading + +This project will be graded by the [Autograder](https://autograder.io/web/project/2896){: target="_blank" rel="noopener noreferrer"}. The project is worth a total of 85 points. You may submit to the Autograder for feedback up to 5 times per day. \ No newline at end of file diff --git a/projects/project1.md b/projects/project1.md new file mode 100644 index 0000000..023af81 --- /dev/null +++ b/projects/project1.md @@ -0,0 +1,60 @@ +--- +layout: page +title: Project 1 +parent: Projects +nav_order: 2 +description: Specification of project 1 for DeepRob at the University of Michigan. +--- + +# Project 1 + +## Overview +The objective of this project is to gain experience building a machine learning pipeline that can be used to train and evaluate image classification models. In this project you will implement a set of classification models then apply them to a dataset of images in the context of domestic service robots. + +### The goals for this project are as follows: + - Implement a K-Nearest Neighbors classifier. + - Implement a Multiclass Support Vector Machine classifier. + - Implement a Softmax classifier. + - Understand the differences and tradeoffs between each of these classifiers. + - Understand the characteristics of instance-level classification using the [PROPS Classification Dataset]({{ site.baseurl }}/datasets/props-classification/). + - Practice with cross validating your machine learning models. + + +## Instructions + +1. Download the project starter code + - [Project 1 starter code: P1.zip]({{ site.baseurl }}/assets/projects/P1.zip) + +2. Unzip the starter code and upload to Google Drive + - Once unzipped, you should find a root directory titled 'P1'. The 'P1' directory contains all starter code and files needed to complete this project. Please upload the 'P1' directory to your [Google Drive](https://drive.google.com/){: target="_blank" rel="noopener noreferrer"}. + +3. Open the `*.ipynb` and `*.py` files and implement features + - We recommend implementing the features in a Google Colab environment. The Colab development environment can be accessed by double-clicking on each `*.ipynb` and `*.py` file within your Drive. Instructions for each feature are included in the `knn.ipynb` and `linear_classifier.ipynb` files. + + - We suggest starting by implementing the required features as they appear in the `knn.ipynb` notebook, which can be thought of as part 1 of the project. Then work through the `linear_classifier.ipynb` notebook as part 2 of the project. + + - While working on the project, keep the following in mind: + + - The notebook and the python file have clearly marked blocks where you are expected to write code. Do not write or modify any code outside of these blocks. + - Do not add or delete cells from the notebook. You may add new cells to perform scratch computations, but you should delete them before submitting your work. + - Run all cells, and do not clear out the outputs, before submitting. You will only get credit for code that has been run. + - To avoid experiencing Colab usage limits, save and close your notebooks once finished working. + +4. Submit your implementation for Autograder feedback + - Once you have implemented a portion of the required features, you may submit your work for feedback from the Autograder. To receive feedback, download your `*.ipynb` and `*.py` files then upload them to the [Project 1 Autograder](https://autograder.io/web/project/2893){: target="_blank" rel="noopener noreferrer"}. You may submit to the Autograder for feedback up to 5 times per day. + +5. Download final implementation + - After implementing all features, save your work and download the completed `*.ipynb` and `*.py` files. + - The last cell of the `linear_classifier.ipynb` notebook will generate a `uniqueid_umid_P1.zip` file. The zip file should include `knn.ipynb`, `knn.py`, `linear_classifier.ipynb`, `linear_classifier.py`, `svm_best_model.pt`, and `softmax_best_model.pt` for this assignment. + +6. Submit your python and notebook files for grading + - Upload your files to the [Autograder](https://autograder.io/web/project/2893){: target="_blank" rel="noopener noreferrer"} for grading consideration. Your highest score will be used for final grades. + +## Deadline + +This project is due on Sunday, February 2nd at 11:59pm EST. We suggest starting as soon as possible. + +## Grading + +This project will be graded by the [Autograder](https://autograder.io/web/project/2893){: target="_blank" rel="noopener noreferrer"}. The project is worth a total of 95 points. You may submit to the Autograder for feedback up to 5 times per day. + diff --git a/projects/project2.md b/projects/project2.md new file mode 100644 index 0000000..3f7a10b --- /dev/null +++ b/projects/project2.md @@ -0,0 +1,58 @@ +--- +layout: page +title: Project 2 +parent: Projects +nav_order: 3 +description: Specification of project 2 for DeepRob at the University of Michigan. +--- + +# Project 2 + +## Overview +The objective of this project is to gain experience building and training neural networks as multi layer perceptrons. In this project you will implement a fixed size two layer neural network and a set of generic network layers that can be used to build and train multi layer perceptrons. + +### The goals for this project are as follows: + - Implement the forward and backward pass for a two layer neural network. + - Generalize your network implementation to fully connected layers. + - Implement the forward and backward pass for a non-linear activation function (ReLU). + - Implement and understand the tradeoffs using network regularization techniques. + - Understand the characteristics of neural network based classification using the [PROPS Classification Dataset]({{ site.baseurl }}/datasets/props-classification/). + + +## Instructions + +1. Download the project starter code + - [Project 2 starter code: P2.zip]({{ site.baseurl }}/assets/projects/P2.zip) + +2. Unzip the starter code and upload to Google Drive + - Once unzipped, you should find a root directory titled 'P2'. The 'P2' directory contains all starter code and files needed to complete this project. Please upload the 'P2' directory to your [Google Drive](https://drive.google.com/){: target="_blank" rel="noopener noreferrer"}. + +3. Open the `*.ipynb` and `*.py` files and implement features + - We recommend implementing the features in a Google Colab environment. The Colab development environment can be accessed by double-clicking on each `*.ipynb` and `*.py` file within your Drive. Instructions for each feature are included in the `two_layer_net.ipynb` and `fully_connected_networks.ipynb` files. + + - We suggest starting by implementing the required features as they appear in the `two_layer_net.ipynb` notebook, which can be thought of as part 1 of the project. Then work through the `fully_connected_networks.ipynb` notebook as part 2 of the project. + + - While working on the project, keep the following in mind: + + - The notebook and the python file have clearly marked blocks where you are expected to write code. Do not write or modify any code outside of these blocks. + - Do not add or delete cells from the notebook. You may add new cells to perform scratch computations, but you should delete them before submitting your work. + - Run all cells, and do not clear out the outputs, before submitting. You will only get credit for code that has been run. + - To avoid experiencing Colab usage limits, save and close your notebooks once finished working. + +4. Submit your implementation for Autograder feedback + - Once you have implemented a portion of the required features, you may submit your work for feedback from the Autograder. To receive feedback, download your `*.ipynb` and `*.py` files then upload them to the [Project 2 Autograder](https://autograder.io/web/project/2895){: target="_blank" rel="noopener noreferrer"}. You may submit to the Autograder for feedback up to 5 times per day. + +5. Download final implementation + - After implementing all features, save your work and download the completed `*.ipynb` and `*.py` files. + - The last cell of the `fully_connected_networks.ipynb` notebook will generate a `uniqueid_umid_P2.zip` file. The zip file should include `two_layer_net.ipynb`, `two_layer_net.py`, `fully_connected_networks.ipynb`, `fully_connected_networks.py`, `nn_best_model.pt`, `best_overfit_five_layer_net.pth`, and `best_two_layer_net.pth` for this assignment. + +6. Submit your python and notebook files for grading + - Upload your files to the [Autograder](https://autograder.io/web/project/2895){: target="_blank" rel="noopener noreferrer"} for grading consideration. Your highest score will be used for final grades. + +## Deadline + +This project is due on Sunday, February 16th at 11:59pm EST. We suggest starting as soon as possible. + +## Grading + +This project will be graded by the [Autograder](https://autograder.io/web/project/2895){: target="_blank" rel="noopener noreferrer"}. The project is worth a total of 110 points. You may submit to the Autograder for feedback up to 5 times per day. diff --git a/projects/project3.md b/projects/project3.md new file mode 100644 index 0000000..7492e63 --- /dev/null +++ b/projects/project3.md @@ -0,0 +1,59 @@ +--- +layout: page +title: Project 3 +parent: Projects +nav_order: 4 +description: Specification of project 3 for DeepRob at the University of Michigan. +--- + +# Project 3 + +## Overview +The objective of this project is to gain experience building and training convolutional neural networks for classificaiton and detection. In this project you will implement a feed forward CNN for image classification and a version of [Faster R-CNN](https://arxiv.org/abs/1506.01497){: target="_blank" rel="noopener noreferrer"} for object detection. + +### The goals for this project are as follows: + - Implement the forward and backward pass for a convolutional neural network. + - Apply your network implementation to image classification. + - Observe improved classification performance using convolutions. + - Implement the [Faster R-CNN](https://arxiv.org/abs/1506.01497){: target="_blank" rel="noopener noreferrer"} architecture for object detection. + - Understand the characteristics of neural network based object detection using the [PROPS Detection Dataset]({{ site.baseurl }}/datasets/props-detection/). + + + +## Instructions + +1. Download the project starter code + - [Project 3 starter code: P3.zip]({{ site.baseurl }}/assets/projects/P3.zip) + +2. Unzip the starter code and upload to Google Drive + - Once unzipped, you should find a root directory titled 'P3'. The 'P3' directory contains all starter code and files needed to complete this project. Please upload the 'P3' directory to your [Google Drive](https://drive.google.com/){: target="_blank" rel="noopener noreferrer"}. + +3. Open the `*.ipynb` and `*.py` files and implement features + - We recommend implementing the features in a Google Colab environment. The Colab development environment can be accessed by double-clicking on each `*.ipynb` and `*.py` file within your Drive. Instructions for each feature are included in the `convolutional_networks.ipynb` and `two_stage_detector.ipynb` files. + + - We suggest starting by implementing the required features as they appear in the `convolutional_networks.ipynb` notebook, which can be thought of as part 1 of the project. Then work through the `two_stage_detector.ipynb` notebook as part 2 of the project. + + - While working on the project, keep the following in mind: + + - The notebook and the python file have clearly marked blocks where you are expected to write code. Do not write or modify any code outside of these blocks. + - Do not add or delete cells from the notebook. You may add new cells to perform scratch computations, but you should delete them before submitting your work. + - Run all cells, and do not clear out the outputs, before submitting. You will only get credit for code that has been run. + - To avoid experiencing Colab usage limits, save and close your notebooks once finished working. + +4. Submit your implementation for Autograder feedback + - Once you have implemented a portion of the required features, you may submit your work for feedback from the Autograder. To receive feedback, download your `*.ipynb` and `*.py` files then upload them to the [Project 3 Autograder](https://autograder.io/web/project/2892){: target="_blank" rel="noopener noreferrer"}. You may submit to the Autograder for feedback up to 5 times per day. + +5. Download final implementation + - After implementing all features, save your work and download the completed `*.ipynb` and `*.py` files. + - The last cell of the `two_stage_detector.ipynb` notebook will generate a `uniqueid_umid_P3.zip` file. The zip file should include `convolutional_networks.ipynb`, `convolutional_networks.py`, `two_stage_detector.ipynb`, `two_stage_detector.py`, `one_minute_deepconvnet.pth`, `overfit_deepconvnet.pth`, and `rcnn_detector.pt` for this assignment. + +6. Submit your python and notebook files for grading + - Upload your files to the [Autograder](https://autograder.io/web/project/2892){: target="_blank" rel="noopener noreferrer"} for grading consideration. Your highest score will be used for final grades. + +## Deadline + +This project is due on Sunday, March 9th at 11:59pm EST. We suggest starting as soon as possible. + +## Grading + +This project will be graded by the [Autograder](https://autograder.io/web/project/2892){: target="_blank" rel="noopener noreferrer"}. The project is worth a total of 90 points. You may submit to the Autograder for feedback up to 5 times per day. diff --git a/projects/project4.md b/projects/project4.md new file mode 100644 index 0000000..ce8d164 --- /dev/null +++ b/projects/project4.md @@ -0,0 +1,58 @@ +--- +layout: page +title: Project 4 +parent: Projects +nav_order: 5 +description: Specification of project 4 for DeepRob at the University of Michigan. +--- + +# Project 4 + +## Overview +The objective of this project is to gain experience building and training convolutional neural networks for 6 degrees-of-freedom rigid body pose estimation. In this project you will implement a version of [PoseCNN](https://arxiv.org/abs/1711.00199){: target="_blank" rel="noopener noreferrer"} for object pose estimation. In addition, we will work to build up a Vision Transformer model for classification. + + +### The goals for this project are as follows: + - Implement the [PoseCNN](https://arxiv.org/abs/1711.00199){: target="_blank" rel="noopener noreferrer"} architecture for object pose estimation. + - Understand the characteristics of neural network based object pose estimation using the [PROPS Pose Dataset](/datasets/props-pose/). +- Implement the [Vision Transformer](https://arxiv.org/abs/2010.11929){: target="_blank" rel="noopener noreferrer"} architecture for image classification. + - Gain experience reimplementing network architectures by translating from text and figure descriptions to code implementations. + + +## Instructions + +1. Download the project starter code + - [Project 4 starter code: P4.zip]({{ site.baseurl }}/assets/projects/P4.zip) + +2. Unzip the starter code and upload to Google Drive + - Once unzipped, you should find a root directory titled 'P3'. The 'P4' directory contains all starter code and files needed to complete this project. Please upload the 'P4' directory to your [Google Drive](https://drive.google.com/){: target="_blank" rel="noopener noreferrer"}. + +3. Open the `*.ipynb` and `*.py` files and implement features + - We recommend implementing the features in a Google Colab environment. The Colab development environment can be accessed by double-clicking on each `*.ipynb` and `*.py` file within your Drive. Instructions for each feature are included in the `pose_estimation.ipynb` and `vision_transformers.ipynb` files. + + - We suggest starting by implementing the required features as they appear in the `pose_estimation.ipynb` notebook. + + - While working on the project, keep the following in mind: + + - The notebook and the python file have clearly marked blocks where you are expected to write code. Do not write or modify any code outside of these blocks. + - Do not add or delete cells from the notebook. You may add new cells to perform scratch computations, but you should delete them before submitting your work. + - Run all cells, and do not clear out the outputs, before submitting. You will only get credit for code that has been run. + - To avoid experiencing Colab usage limits, save and close your notebooks once finished working. + +4. Submit your implementation for Autograder feedback + - Once you have implemented a portion of the required features, you may submit your work for feedback from the Autograder. To receive feedback, download your `*.ipynb` and `*.py` files then upload them to the [Project 4 Autograder](https://autograder.io/web/project/2902){: target="_blank" rel="noopener noreferrer"}. You may submit to the Autograder for feedback up to 5 times per day. + +5. Download final implementation + - After implementing all features, save your work and download the completed `*.ipynb` and `*.py` files. + - The last cell of the `vision_transformers.ipynb` notebook will generate a `uniqueid_umid_P4.zip` file. The zip file should include `pose_estimation.ipynb`, `pose_cnn.py`, `vision_transformers.ipynb`, `vision_transformers.py`, and `one_minute_vit.pth` for this assignment. + +6. Submit your python and notebook files for grading + - Upload your files to the [Autograder](https://autograder.io/web/project/2902){: target="_blank" rel="noopener noreferrer"} for grading consideration. Your highest score will be used for final grades. + +## Deadline + +This project is due on Sunday, March 30th at 11:59pm EST. We suggest starting as soon as possible. + +## Grading + +This project will be graded by the [Autograder](https://autograder.io/web/project/2902){: target="_blank" rel="noopener noreferrer"}. The project is worth a total of 75 points. You may submit to the Autograder for feedback up to 5 times per day. \ No newline at end of file diff --git a/reports/BAGS.md b/reports/BAGS.md new file mode 100644 index 0000000..99901c8 --- /dev/null +++ b/reports/BAGS.md @@ -0,0 +1,130 @@ +--- +layout: project +parent: Reports +title: (Eye) BAGS: Bundle-Adjusting Gaussian Splatting +description: This is a final project report for DeepRob at the University of Michigan. +authors: + - name: Ruben Fonseca + social: "https://rubenfonseca4664.wixsite.com/ruben-fonseca" + affiliation: University of Michigan + - name: Sacchin Sundar + affiliation: University of Michigan +--- + + + +
+Teaser Figure +
+ + + + + +## Abstract + +This project presents an extension to the Gaussian Object framework for object-centric 3D reconstruction under uncertain camera poses. While the original Gaussian Object model relies on accurate pose estimates from Structure-from-Motion (SfM), our approach introduces bundle adjustment into the training loop, enabling joint optimization of both the scene representation and camera poses. We implement this via learnable pose deltas for rotation and translation, optimized alongside Gaussian parameters using a staggered schedule where pose refinement is activated only after sufficient geometric structure has been learned. + +Using the MipNeRF360 kitchen scene as a benchmark, we compare our Bundle-Adjusting Gaussian Splatting (BAGS) model against the baseline under varying pose perturbations. Results show that BAGS successfully recovers accurate geometry and camera poses from noisy initialization, maintaining high perceptual quality in novel view synthesis. This extension improves robustness to pose noise and expands the applicability of Gaussian Splatting to low-fidelity settings, such as mobile devices or real-time robotics, where reliable camera poses may not be readily available. + +## Introduction + +Computer vision and perception have been long-standing topics of academic interest over the past few years, and with the rise of robotics-based applications and work, have become increasingly important for imbuing functionality for such systems. Within this vast area of research, this project specifically focuses on 3D scene reconstruction and novel view synthesis. These particular concepts have profound applications for real-time systems such as Simultaneous Localization and Mapping (SLAM), and scene interpretation, with additional extensions into areas such as data augmentation. + +Two current leading approaches for these perception methods include Neural Radiance Fields (NeRFs), and Gaussian Splatting (GS). Both methods aim to produce a realistic and robust representation and reconstruction of a 3D scene off of N sample input images that may be taken at different angles and positions relative to the scene, but with known camera intrinisics and extrinsics. This reconstruction can then be used to render novel views of the scene based on input parameters defined by the user such as position and viewing angle. + +NeRFs, introduced in 2020, work by taking in N sample images, and uses one large Multi-Layer Perceptron (MLP) to overfit the training data based on a 5D continuous representation of the scene known as a Neural Radiance Field. Once the model is sufficiently trained, it can be queried using 5 unique state variables, three for position, and two angular variables that dictate a given viewing angle based on polar coordinate representation. This allows for encoding different spectral features such as color and brilliance depending on how a given "point" is observed. + +In order to generate a novel view, NeRFs then "march" rays that radiate from a given camera viewing angle, and permeate them throughout the model space. Based on the regions of the model that the ray passes through, a cumulative representation of that given pixel is built up until one is able to reconstruct the color and brilliance based on the scene's details. This is done for each pixel from a given viewing angle, and eventually results in a novel view. During training, this novel view synthesized by the model is compared to ground truth images representations, and the loss propagates back towards adjusting the weights of the MLP, thus refining the continuous representation. Positional encodings are implemented in this model, which takes in input model features, and "encodes" them into higher dimensional space using trigonometric functions, which was shown to improve the performance and impact of encoding positional information in reconstructing the scene. Based on these implementations, and breakthroughs with respect to their implemented loss and reconstruction methods, NeRFs were able to demonstrate significant improvement over previously existing methods, yet demonstrated limitations with respect to real-time rendering given that the rendering scheme of the model was computationally heavy. + +Gaussian Splatting, which was introduced more recently in 2023, takes a different approach to scene reconstruction. Instead of framing the model under a continuous representation of the scene, GS opts for an explicit implementation that relies on base features known as Gaussian Splats \cite{kerbl20233dgaussiansplattingrealtime}. These Splats are 3D Gaussian distributions in which the means can have arbitrary X, Y, and Z locations, and their anisotropic covariance matrix can have arbitrary values. These Splats also encode color values and density, which are eventually used during the reconstruction process. Based on an initial sparse point cloud generated by methods such as Structure from Motion (SfM), these Splats are initialized with respect to the output points, and are further refined during training. Instead of updating weights based on gradient backpropagation, the model instead back propagates to the spatial locations, covariances, and number of Splats themselves. After training is complete, a highly optimized and efficient rendering method allows for Gaussians to be combined graphically to provide a high-fidelity reconstruction of the relevant scene that can achieve real-time speeds (>= 30 fps). + +## Algorithmic Extension + +This work proposes to incorporate bundle adjustment into the Gaussian Object pipeline. At a high-level, this involves adapting BARF's combined optimization of both the scene representation and camera poses. + +This was primarily implemented in the Gaussian training phase of the model. Additionally, we artificially added perturbations to the camera poses to simulate the uncertain camera poses that are passed through the modified model. + +Several modifications were made to the model in order to achieve this. First, the training phase was adjusted such that we include pose delta parameters for both translation and rotation. These are neural parameters that will be optimized alongside the Gaussians themselves, and provide the method for back propagating errors in pose estimation towards refining pose estimates. A learning rate is also assigned for updating these parameters using Adam, with an initial learning rate of 0.003. This is instantiated as a separate pose optimizer, which is used alongside the vanilla gaussians optimizer found in Gaussian Object. The initial implementation has both optimization occurring from the start, but as seen in the Results section of our paper, this method produced poor performance and diverging gradients early in training. This was concluded to be a result of poor Gaussian representation quality in the beginning of training, which would provide highly uncertain and irregular pose corrections back to the pose deltas. + +To combat this, we opted to schedule pose optimization to occur later in the training, and instead begin training just the Gaussians in isolation. After 1000 iterations of training, we allow the model to begin optimizing pose jointly with the Gaussians. + + +## Results + +Once the model and experimental setup were ready, the vanilla Gaussian Object model was trained on the ``kitchen'' sub-dataset from MipNeRF360. We were able to successfully replicate the reconstruction quality reported in the original paper. We then evaluated our extended method, BAGS (Bundle-Adjusting Gaussian Splatting), under the same conditions but with noisy initial camera poses and delayed pose optimization as shown in the below figure. + +
+Teaser Figure +
+ + +We evaluate both models on standard perceptual and fidelity metrics: LPIPS (↓ lower is better), PSNR (↑ higher is better), and SSIM (↑ higher is better), across different view counts (4, 6, and 9 views). The following table summarizes the performance: + +
+Teaser Figure +
+ + +As expected, our method performs slightly below the baseline when using perfect COLMAP poses, especially for low view counts. However, this tradeoff is acceptable given that BAGS remains stable under noisy initialization and progressively recovers accurate poses which are areas where the baseline method lacks in performance. + +
+Teaser Figure +
+ + +To evaluate the robustness of BAGS to initialization noise, we conducted an ablation study by injecting increasing levels of synthetic pose perturbation into the input COLMAP poses. As shown in the above figure, we observe that while the baseline Gaussian Object model performs comparably to BAGS under low noise (5°–10Β°), its reconstruction quality deteriorates significantly beyond 15Β°, effectively failing at 20Β° due to its reliance on fixed poses. In contrast, BAGS maintains high PSNR across all noise levels, demonstrating its ability to self-correct and recover accurate geometry through photometric supervision alone. This behavior highlights the key advantage of integrating bundle adjustment directly into the optimization process β€” enabling consistent performance even under severe pose uncertainty. + +
+Teaser Figure +
+ + +The above figure shows the total loss over 10,000 iterations of training. Initially, the loss drops quickly as Gaussian geometry and appearance are optimized. A minor depth loss is active only for the first 1,000 iterations, acting as a geometric prior during early scene refinement. + +
+Teaser Figure +
+ +Importantly, the above figure shows the evolution of pose error β€” both rotation (degrees) and translation (meters) β€” over time. From iteration 1,000 onward (when pose deltas are activated), the model recovers from significant synthetic pose noise (15Β° rotation, 0.3 m translation), converging to under 3.4Β° and 0.02 m error respectively. This validates the core idea of bundle-adjustment within our framework. + +## Qualitative Findings + +BAGS produced high-quality object reconstructions, even in the presence of noisy poses. Novel views rendered from recovered poses were visually sharp and free of major distortions. The model retained real-time inference capabilities and seamlessly integrated with the repair module. + +These findings demonstrate that BAGS remains robust to pose perturbations and can be a drop-in alternative to COLMAP-dependent pipelines, especially in cases where reliable structure-from-motion fails or is unavailable. + +## Conclusion + +This work was inspired by advances in bundle-adjusting Neural Radiance Fields (NeRFs), and aimed to bring similar robustness to the realm of Gaussian Splatting. Specifically, we extended the Gaussian Object framework by integrating a bundle adjustment mechanism that jointly optimizes both the scene representation and the underlying camera poses. + +Our method introduces learnable pose deltasβ€”neural parameters representing camera rotation and translation offsetsβ€”that are optimized alongside the Gaussian splats using backpropagation. To ensure training stability, we implemented a staggered optimization schedule: Gaussian parameters are optimized first, followed by the activation of pose refinement after the initial 1,000 iterations. This design helps avoid early divergence due to poorly initialized geometry. + +Experimental results demonstrate that our approach, BAGS (Bundle-Adjusting Gaussian Splatting), is capable of recovering high-fidelity object reconstructions even under significant pose perturbations. Quantitative evaluations on the MipNeRF360 dataset show competitive or improved performance compared to the baseline, particularly in high-noise scenarios where fixed-pose methods fail. Our method consistently reduces pose error from initial deviations of up to 15Β° and 30 cm to under 0.4Β° and 2 cm, while maintaining strong perceptual quality in rendered views. + +The implications of this work are broad. Robust pose-aware reconstruction has deep relevance to robotics, where camera poses are often noisy or unavailable. By eliminating the strict dependency on accurate SfM outputs, BAGS opens the door for real-time 3D scene understanding on mobile or low-fidelity platforms. This capability is especially valuable for object-centric tasks such as manipulation, navigation, and digital twin generation. + +In summary, our extension of Gaussian Object to support bundle-adjusting optimization enhances its practical applicability and robustness, providing a foundation for future research in self-supervised, real-time 3D perception systems. + + + + +## Citation + +If you found our work helpful, consider citing us with the following BibTeX reference: + +``` +@article{fonseca2025deeprob, + title = {(Eye) BAGS: Bundle-Adjusting Gaussian Splatting}, + author = {Fonseca, Ruben and Sundar, Sacchin}, + year = {2025} +} +``` + + +## Contact + +If you have any questions, feel free to contact [Ruben Fonseca and Sacchin Sundar](mailto:rubenafc@umich.edu). + diff --git a/reports/DOPE-Plus.md b/reports/DOPE-Plus.md new file mode 100644 index 0000000..dea2fad --- /dev/null +++ b/reports/DOPE-Plus.md @@ -0,0 +1,182 @@ +--- +layout: project +parent: Reports +title: "DOPE-Plus: Enhancements in Feature Extraction and Data Generation for 6D Pose Estimation" +description: Group 6 final project report for DeepRob at the University of Michigan. +authors: + - name: Jeffrey Chen + social: "https://www.linkedin.com/in/zeyu-jeffrey-chen/" + affiliation: University of Michigan + - name: Yuqiao Luo + affiliation: University of Michigan + - name: Longzhen Yuan + affiliation: University of Michigan +--- + + + +
+Teaser Figure +
+ + + + + +## Abstract + +This study explored enhancements to the Deep Object Pose Estimation framework [(DOPE, Tremblay et al., 2018)](https://arxiv.org/abs/1809.10790){: target="_blank" rel="noopener noreferrer"} by improving both its network architecture and synthetic data generation pipeline for 6D object pose estimation. We proposed replacing the original VGG-19-based feature extractor with a Vision Transformer (ViT), aiming to leverage its superior representation capabilities. In parallel, we developed a refined data generation pipeline, resulting in an augmented [HOPE dataset (Lin et al., 2021)](https://arxiv.org/abs/2103.13539){: target="_blank" rel="noopener noreferrer"} and a new fully synthetic dataset of a customized object, Block. These datasets were used to train and evaluate our modified DOPE model on two target objects: Cookies and Block. Experimental results demonstrate that incorporating ViT improves pose estimation performance over the original VGG-19 backbone, suggesting the potential for further advancements through the integration of more powerful feature extractors. + +## Introduction and Backgrounds + +As robotics continues to advance, researchers are increasingly exploring ways to equip robots with the capabilities needed to perform everyday tasks. Many of these tasks require fundamental operations such as object fetching, which depend on accurate pose estimation of target objects. This study investigated the DOPE (Deep Object Pose Estimation) proposed by J. Tremblay et al. in 2018, and further extended the feature extraction and data generation pipelines. The original DOPE framework employed VGG-19 as the feature extractor. In our work, we replaced it with a Vision Transformer (ViT), motivated by its superior feature extraction capabilities, particularly in capturing relationships between multiple objects. Meanwhile, we enhanced the original DOPE data synthesis pipeline to augment and generate two new datasets for network training. Our goal is to improve the accuracy of 6D object pose estimation and to validate the effectiveness of our enhancements for object perception in real-world scenarios. + +### DOPE + +DOPE (Deep Object Pose Estimation) is a one-shot, instance-based, deep neural network-based system designed to estimate the 3D poses of known objects in cluttered scenes from a single RGB image, in near real time and without the need for post-alignment.The DOPE network is a convolutional deep neural network that detects objects' 3D keypoints using multistage architechture. + +Firstly, the image features are extracted by the first ten layers of the VGG-19 convolutional neural network (with pre-trained parameters). Then two 3 Γ— 3 convolutional are applied to the features to reduce the feature dimensions from 512 to 128. + +Second, these 128-dimensional features are fed into the first stage, which consists of three 3 Γ— 3 Γ— 128 convolutional layers and one 1 Γ— 1 Γ— 512 layer, followed by a 1 Γ— 1 Γ— 9 to produce belief maps and and 1 Γ— 1 Γ— 16 to produce vector fields. + +There are 9 believe maps, 8 of them are for the projected vertices of the 3D objects and one for its centroid. Vector fields indicate that the direction from vertices to their corresponding centroids, to construct the bounding boxes of objects after detection. + +### Data Generation: + +As more data is required to train a deep network with high performance, it can be difficult to gather enough data for training. In addition, unlike 2D labeling, making 3D pose labels manually is much more difficult. DOPE proposed a method to generate data, which allows scientists to gather enough number of data rapidly, and greatly alleviate the workload of labeling manually. + +The overall data synthesis strategy is to generate two kinds of dataset: "domain randomized (DR)" and "photorealistic (photo)". The domain randomized data are generated by putting the target object into a virtual environment, which is composed of different distractor objects and a random background. The objects shown in DR images do not necessarily obey physical principles. Photorealistic data are generated by putting target objects into 3D backgrounds with physical constraints. In other words, they are impacted by the effects of gravity and collision. + + +## Algorithmic Extension + +### Network Architecture + +One of our algorithmic extensions is that we replaced the original VGG19 feature extractor network with ViT, because we perceive ViT's larger receptive field and its ability to relate the global scene, rather than focus on a local area. To make this change, many parts of the original model backbone need to be modified.We created a pre-trained ViT feature extractor using the timm library. It accepts images of dimension 244 Γ— 244 with a patch size 16 Γ— 16, as a result, interpolating is needed to make sure the input data has a size of 244 Γ— 244. Then we take the output from ViT only in the final layer. At the next stage, two convolutional layers are employed to reduce the number of channel to 128, hence the dimension matches the following network structure (the belief map stages). + +### Data Generation + +We enhanced the original data generation pipeline using BlenderProc to produce two distinct synthetic RGB datasets, each corresponding to a specific target object: Cookies and Block. The Cookies object is part of the publicly available HOPE dataset, while the Block is a newly introduced, custom-designed object. Our pipeline incorporates randomized camera poses, object poses, and 360-degree HDRI backgrounds, while ensuring that these variations remain physically reasonable. These improvements aim to create a more diverse and robust synthetic dataset, helping to mitigate the common sim-to-real domain gap in deep learning applications. The enhanced pipeline consists of four main stages: (1) textured 3D CAD modeling, (2) real-world HDRI background generation, (3) image synthesis, and (4) ground truth annotation pre-processing. + + (1)Textured 3D CAD Modeling and Real-World Background Generation + + To obtain a precise 3D textured model of the customized object, we first used SolidWorks to create an accurate geometric model with correct dimensions. Blender was then employed to add textures and enrich visual details, including colors and physical material properties, as shown below. + +
+Teaser Figure +
+

3D Textured Model

+ For real-world HDRI background generation, we captured raw 360-degree images of the desired physical environments using the Insta360 X3 camera. These images were subsequently pre-processed and converted into HDRI backgrounds using Adobe Photoshop, as illustrated below. +
+Teaser Figure +
+

Sampled HDRI Background

+ + (2)Image Synthesis + + With all necessary elements prepared, we proceeded to the image synthesis stage. We developed a Python script to randomize the poses of cameras, target objects, and distractors. To emulate typical indoor scenarios encountered in onboard SLAM and manipulation tasks, we assumed that both the camera and the target object remained upright, with randomized yaw angles and small perturbations in pitch and roll. In contrast, distractor objects were randomized with full degrees of freedom as a form of data augmentation, without adhering to physical stability constraints. + + (3)Ground Truth Annotation Pre-Processing + + With the existing pipeline provided by original paper, ground truth annotations for each frame were automatically generated. However, when constructing a comprehensive dataset for training and validation, it was necessary to combine synthetic and real images from various sources. In this case, the annotation files (e.g., JSON files) often differed in format and configuration. To streamline data preparation and ensure compatibility with downstream tasks, we developed an additional Python script to pre-process and standardize the ground truth annotations. + + + +## Innovative Enhanced Datasets + +We augmented the original HOPE dataset and created a new dataset for the customized Block object by generating synthetic domain-randomized (DR) images, referred to as HOPE-Syn&Real and the Synthetic Block Dataset, respectively. + + (1)HOPE Data Augmentation (HOPE-Syn&Real Dataset) + + We generated additional synthetic data based on the HOPE dataset. The original dataset consists of 28 grocery items, with approximately 300 real images per object. We selected Cookies as the target object for subsequent training tasks. To enrich the existing dataset, we synthesized additional 12,000 domain-randomized (DR) images of this object using the enhanced data generation pipeline developed upon, and combined them with the existing real images to form the HOPE-Syn&Real dataset. To verify the quality of the synthesized images, we employed a validation method adapted from the original codebase to visualize the ground truth annotations, as shown below. +
+ Teaser Figure + Teaser Figure +
+ +
+ Teaser Figure + Teaser Figure +
+

Sampled Generated Data and Visualized Ground Truth in the HOPE-Syn&Real Dataset. (Left column: generated RGB images, Right column: visualized ground truths)

+ + (2)Synthetic Block Dataset + + In addition to augmenting the HOPE dataset, we created a fully synthetic dataset for our customized Block object using the aforementioned methods and strategies. This dataset consists of over 19,300 domain-randomized images, with random variations in block poses, instance counts, backgrounds, and distractor objects. Furthermore, as shown below, lighting conditions and shadows were simulated and rendered to further enhance realism and dataset diversity. +
+Teaser Figure +
+

Synthetic Domain Randomized Image in the Synthetic Block Dataset

+ + + + + + +## Results + +To quantify and compare model performance, we trained four models for Cookies and Block in total: one original DOPE and one ViT-DOPE model for each object. The HOPE-Syn&Real Dataset and the Synthetic Block Dataset were used to train Cookies and Block models, respectively. Each used dataset was split into training and validation subsets, where the validation sets contained around 5% - 7% of the total images. Both datasets do not contain photorealistic images due to project deadline constraints and the lack of open-sourced data generation scripts in the original DOPE codebase. Hence, Cookies' models were trained with DR and real images while Block's models were merely trained on DR data. + +The statistic results of object "Cookies" are shown below: +
+ Loss + Accuracy + Accuracy +
+

Loss, Accuracy and mAP Values of "Cookies" Object Training

+ + +The statistic results of object "Block" are shown below: +
+ Loss + Accuracy + Accuracy +
+

Loss, Accuracy and mAP Values of "Block" Object Training

+ +An example of model belief map prediction is shown below: +
+Teaser Figure +
+

Model Inference Example

+ +An example of our model inference to predict the object's bounding box is shown below: +
+Teaser Figure +
+

Model Inference Example

+ + + +## Citation + +If you found our work helpful, consider citing us with the following BibTeX reference: + +``` +@article{jeffrey2025deeprob, + title = {DOPE-Plus: Enhancements in Feature Extraction and Data Generation for 6D Pose Estimation}, + author = {Chen, Jeffrey and Luo, Yuqiao and Yuan, Longzhen}, + year = {2025} +} +``` + + +## Contact + +If you have any questions, feel free to contact [Jeffrey Chen, Yuqiao Luo and Longzhen Yuan](mailto:jeffzc@umich.edu?cc=joeluo@umich.edu?cc=longzhen@umich.edu). + diff --git a/reports/FETCH-GRASP.md b/reports/FETCH-GRASP.md new file mode 100644 index 0000000..a59a876 --- /dev/null +++ b/reports/FETCH-GRASP.md @@ -0,0 +1,233 @@ +--- +layout: project +parent: Reports +title: Learning-Based Segmentation & Grasping on FETCH with PointNet++ & AnyGrasp +description: Grasping opaque and transparent objects in real-world settings is hindered by noisy and incomplete point clouds. We present a pipeline on FETCH that uses Open3D for filtering, PointNet++ for learned segmentation, and AnyGrasp with MoveIt for accurate grasp prediction and execution. +authors: + - name: Marzuk Kalluparamban + social: "http://linkedin.com/in/marzukkp/" + affiliation: University of Michigan + - name: Hemanth Murali + social: "https://hemanthmurali.me" + affiliation: University of Michigan + - name: Maithreyan Ganesh + social: "https://www.linkedin.com/in/maithreyan-ganesh-5b248a207/" + affiliation: University of Michigan + - name: Jyotishka Dutta Gupta + social: "https://www.linkedin.com/in/jyotishka-duttagupta/" + affiliation: University of Michigan +--- + + + + + +
+Teaser Figure +
+ + + + +## Abstract + +We demonstrate a real-world grasping pipeline that uses RGB-D data from a Kinect sensor installed on the FETCH robot to grasp opaque and partially transparent objects. We use a preprocessing pipeline that combines planar surface extraction using RANSAC, clustering with DBSCAN, and Open3D-based NaN removal to address noise, missing depth, and environmental clutter. Target isolation and object segmentation are performed using a PointNet++ model trained on augmented YCB point clouds. AnyGrasp is used to produce grasp candidates, which are then assessed using MoveIt’s inverse kinematics and collision checking framework. We compare the system’s performance on rigid and transparent objects in both isolated and cluttered scenarios. Grasp success rate and collision rate are used as key metrics, with additional testing on colorless and colored transparent objects. + +## Introduction + +Robotic grasping in unstructured environments remains a significant challenge due to perception uncertainty, object diversity, and planning constraints. In this project, we develop a real-world grasping pipeline for the FETCH mobile manipulator using RGB-D data from a Kinect sensor. The system integrates motion planning with MoveIt, grasp candidate generation with AnyGrasp, object segmentation with PointNet++, and point cloud preprocessing through Open3D filtering, RANSAC, and DBSCAN. The focus is on the implementation, deployment, and validation of the system on real hardware. Performance is evaluated on both rigid and transparent objects in isolated and cluttered settings, highlighting the strengths and limitations of the approach in practical robotic manipulation tasks. + + +## Methods + +### Robust Grasp Perception: AnyGrasp + +AnyGrasp [(Fang et al., 2022)](https://arxiv.org/abs/2212.08333){: target="_blank" rel="noopener noreferrer"} provides dense and temporally consistent 7-DoF grasp pose predictions from partial point clouds. The system consists of a Geometry Processing Module, which samples and predicts stable grasp candidates using point-wise features, and a Temporal Association Module, which tracks grasps across frames for dynamic scenes using feature-based matching. Trained on real-world GraspNet-1Billion data with randomized point dropout, AnyGrasp achieves a 93.3% success rate in bin-picking tasks involving unseen objects. + +
+ AnyGrasp Pipeline +
+ +### Transparent Object Depth Completion: TransCG + +To address the challenge of incomplete depth perception for transparent objects, TransCG [(Fang et al., 2022)](https://arxiv.org/abs/2202.08471){: target="_blank" rel="noopener noreferrer"} introduces a real-world dataset and an efficient depth completion model, DFNet. DFNet refines noisy RGB-D inputs using dense blocks and a dual-loss strategy focusing on depth and surface normals. Trained on 57,715 images with augmentation, DFNet outperforms prior methods like ClearGrasp and demonstrates real-time performance, improving grasping reliability for transparent and translucent objects. + +
+ TransCG Pipeline +
+ +## Results + +### Effect of Object Rigidity + +Grasping performance differs notably between rigid and deformable objects. Rigid objects tend to yield higher success rates and lower collision rates, likely due to their stable structural properties that are better aligned with traditional robotic grasping strategies. In contrast, deformable objects introduce complexities such as unpredictable shape changes and instability during grasping, often resulting in increased accidental contacts and reduced reliability. These challenges highlight the limitations of current grasping systems, which historically have been tuned for rigid environments. Advancements in perception, modeling, and adaptive control will be essential to more effectively handle the nuances of deformable object manipulation moving forward. + +#### Table I + + + + + + + + + + + + + + + + + + + + +
Object TypeSuccess RateCollision Rate
Rigid70%15%
Deformable67%27%
+ +
+ Rigid Grasp 1 + Rigid Grasp 2 +
+ +### Effect of Environment Clutter + +Grasping performance varies significantly between isolated and cluttered environments. In isolated settings, success rates are notably higher, reflecting the reduced complexity and minimal interference from surrounding objects. In contrast, cluttered environments introduce challenges such as the need to navigate around neighboring objects, resulting in lower grasp success rates and the occurrence of collisions. The close proximity of multiple objects, combined with irregular surfacesβ€”especially when deformable objects are involvedβ€”complicates grasp planning and reduces overall stability. These observations reinforce the necessity for more robust grasping strategies capable of dynamically adapting to cluttered and unstructured scenes, an essential consideration for advancing practical robotic manipulation systems. + +#### Table II + + + + + + + + + + + + + + + + + + + + +
Scene TypeSuccess RateCollision Rate
Isolated75%0%
Cluttered70%Clutter collisions
+ +
+ Transparent Grasp 1 + Transparent Grasp 2 +
+ +### Effect of Object Transparency + +Grasping transparent objects presents substantial challenges compared to opaque ones. Performance metrics reveal that success rates are considerably lower for transparent objects, primarily due to limitations in depth sensing and perception. Despite the application of depth completion techniques, incomplete or noisy depth data frequently undermines the stability of initial grasp attempts. Interestingly, while grasp failures were common, collision rates remained low, suggesting that the primary difficulty lies not in avoiding obstacles but in establishing reliable grasp points. These results emphasize a critical gap in current robotic perception systems and highlight the pressing need for more advanced methods tailored specifically to handling transparent materials in complex environments. + + +#### Table III + + + + + + + + + + + + + + + + + + + + +
Object MaterialSuccess RateCollision Rate
Opaque70%15%
Transparent30%0%
+ +
+ Deformable Grasp 1 + Deformable Grasp 2 +
+ + + +## Project Video + +We demonstrate a real-world robotic grasping pipeline using FETCH, combining RGB-D sensing, PointNet++-based segmentation, and AnyGrasp for 6-DoF grasp prediction. Our system handles both opaque and partially transparent objects in isolated and cluttered environments. Below, we showcase the project demo of our project. + +
+
+ +
+
+ + +## Conclusion + +In this project, we developed a modular robotic grasping system on the FETCH platform, capable of handling rigid, deformable, and transparent objects using RGB-D sensing, depth completion, PointNet++ segmentation, and AnyGrasp. While the system achieved strong performance on rigid and isolated objects, challenges remain in cluttered and transparent scenarios due to incomplete depth data and planning limitations. Future work will focus on enhancing transparent object perception, incorporating closed-loop feedback, and improving grasp planning to better approach human-level manipulation performance. + + + +## Citation + +If you found our work helpful, consider citing us with the following BibTeX reference: + +``` +@article{marzuk-umich2025sdeeprob, + title = {Learning-Based Segmentation & Grasping on FETCH with PointNet++ & AnyGrasp}, + author = {Marzuk Kalluparamban, Hemanth Murali, Maithreyan Ganesh, and Jyotishka Dutta Gupta}, + affiliation = {University of Michigan}, + year = {2025} +} +``` +Be sure to update this reference to include your team's author information for correct attribution! + + +## Contact + +If you have any questions, feel free to contact [Marzuk Kalluparamban and Hemanth Murali](mailto:marzukkp@umich.edu?cc=hems@umich.edu). + diff --git a/reports/acceleration.md b/reports/acceleration.md new file mode 100644 index 0000000..8276d3a --- /dev/null +++ b/reports/acceleration.md @@ -0,0 +1,131 @@ +--- +layout: project +parent: Reports +title: Convergence Acceleration For DiffusionDet:On PROPS Dataset +description: This is a final project report for DeepRob at the University of Michigan. +authors: + - name: Gongxing Yu + #social: "https://topipari.com" + affiliation: University of Michigan + - name: Liangkun Sun + #social: "https://xiaoxiaodu.net/" + affiliation: University of Michigan + - name: Yang Lyu + affiliation: University of Michigan + # - name: Yifu Lu + # affiliation: University of Michigan + # - name: Dalton Richardson + # affiliation: University of Michigan + # - name: Odest Chadwicke Jenkins + # social: "https://ocj.name/" + # affiliation: University of Michigan +--- + + + +
+Teaser Figure +
+ + + + + +## Abstract + +DiffusionDet(Diffusion based detection) is a new object detection model that learns the denoising process from ground truth boxes with gaussian noises to original gt boxes. +The authors of DiffusionDet trained and tested their model on +large dataset MS-COCO. To test and enhance DiffusionDet’s +robustness, we trained this model on a much smaller PROPS +dataset, as we observed that the convergence rate and detection +accuracy were poor, we proposed a new heatmap detection +head and introduced heatmap loss as a complement to the +model’s intrinsic loss. The result shows that the optimized model +converges much faster and outperforms the original DiffusionDet +by 1.8 AP on PROPS dataset. + +## Background Introduction +Traditional object detectors use fixed queries or predefined boxes refined via classification and regression. DiffusionDet, inspired by image denoising, introduces a β€œnoise-to-box” approach: Gaussian noise is added to ground truth boxes during training, which are treated as ROIs on feature maps. An RNN-based decoder then learns to recover the original boxes. At inference, the model denoises random boxes to produce final detections, offering a novel method for object localization. +
+Teaser Figure +
+ +##Heatmap Head +To enhance DiffusionDet's convergence speed and improve detection performance on small or occluded objects, we propose a heatmap head as an auxiliary supervision module. The heatmap head consists of two convolution layers with a ReLU activation in between. It maps high-dimensional backbone features to class-specific heatmaps. + +During training, we synthesize ground truth heatmaps using a 2D Gaussian kernel centered at each object's location: +
+Teaser Figure +
+For each point (x , y) on the heatmap, we define a Gaussian response: +
+Teaser Figure +
+This loss is weighted and added to the total detection loss. The heatmap serves as a spatial attention signal, allowing the model to better localize small or occluded objects. +
+Teaser Figure +
+ +## Results + +
+Teaser Figure +
+This figure provides qualitative results on sample images +from the PROPS dataset, highlighting the heatmap-enhanced +model’s capability to accurately localize objects with high +confidence, including challenging small and partially occluded +targets. The optimized DiffusionDet model enhanced by the +heatmap layer demonstrates precise localization and exception- +ally high detection confidence across various object categories. +Notably, the model achieved over 95% confidence scores for +all detected instances, illustrating the robustness and accuracy +of our proposed heatmap guidance. +
+Teaser Figure +
+This figue compares the Average Precision (AP) over the course +of training. The baseline using the Swin Transformer backbone +with 30 proposals demonstrated a low initial convergence +rate. Upon transitioning to the ResNet-50 backbone with an +increased number of proposals, the AP improved considerably. +Incorporating the heatmap head led to a substantial acceler- +ation in convergence, especially evident in the early training +stages, where AP sharply rose from approximately 38% to +58% within the first 1,000 iterations. After 15,000 iterations, +the heatmap-enhanced model achieved a final AP around 67%, +surpassing the baseline approaches by approximately 10%. +
+Teaser Figure +From the result we can see,when there is no Heatmap in the model,70 proposals have better result( higher APs and lower loss).After adding the Heatmap in the model,we get the highest APs and the lowest loss in shorter time. + + + + + + +## Citation + +If you found our work helpful, consider citing us with the following BibTeX reference: + +``` +@article{opipari2024deeprob, + title = { Convergence Acceleration For DiffusionDet&:On PROPS Dataset }, + author = {Yang Lyu,Gongxing Yu,Liangkun Sun}, + year = {2025} +} +``` + +## Contact + +If you have any questions, feel free to contact [Yang Lyu](lyuyang@umich.edu). + diff --git a/reports/aura.md b/reports/aura.md new file mode 100644 index 0000000..8e88717 --- /dev/null +++ b/reports/aura.md @@ -0,0 +1,101 @@ +--- +layout: project +parent: Reports +title: Human Aware Motion Planning for 6 DoF Robot Arm in Painting Application +description: This is a final project report for DeepRob at the University of Michigan. +authors: + - name: Eric Chen + affiliation: University of Michigan + year: 3rd Year Robotics B.S.E + email: erche@umich.edu + + - name: Emily Wu + affiliation: University of Michigan + year: Robotics M.S + email: emilyywu@umich.edu +--- + + + +## Abstract + +Collaborative robots must safely and efficiently operate alongside humans, especially in dynamic and creative environments such as artistic painting. In this work, we develop a human-aware motion planning system for a 6-DoF robot arm collaborating with a human artist. Using real-time 3D human pose estimation from OpenPose combined with depth sensing, we capture the artist’s motion and predict future poses with the lightweight siMLPe network. Predicted human poses are then converted into obstacle representations for real-time collision-aware trajectory planning using MoveIt2. Our system integrates multiple open-source platforms including ROS2, Gazebo, and multiple learning models to enable the robot to adaptively paint around human collaborators. Results show a functional pipeline demonstrating a strong proof-of-concept for human-aware collaboration in creative tasks. Future work includes improving prediction robustness, increasing sensor fidelity, and collecting artist-specific datasets for model refinement. For more in depth information, please check out our technical report linked above. + +## Background + +This project was completed for the Deep Learning for 3D Robot Perception class at the University of Michigan in the winter of 2025. The motion planner developed was an exploration into human-aware planning for a collaborative robot arm for the AURA project in the Robot Studio Lab + under Professor Patricia Alves-Oliveira. The AURA project aims to explore what authenticity of the production process of an artwork means, using biometric data, to better understand Generative AI's impact on the artist community. + +
+
+ +
+
+ +## System Diagram + +
+System Diagram +
+ +In our system, we are capturing the artists movements using an Intel Realsense D455 camera. Using OpenPose to track the human joint positions and querying the depth data at joint positions, we obtain the human poses in the past 25 timesteps. This is then sent to our pre-trained siMLPe model to predict the poses in the next 10 timesteps. The predicted poses are reduced more as we take the torso and arm joints to create cylinders or other primitive shapes to represent a safe area around the human pose for the robot to plan around. These obstacles are sent to MoveIt which will simulate the obstacles and robot arm to create safe trajectories. The goal positions for the robot are given from the CoFRIDA node, which plans strokes for the robot to paint using Generative AI. Once a trajectory is found, the robot can execute it in the real world. + +## Challenges +Some of the challenges we ran into were limited hardware, lack of documentation, inconsistent human skeleton formats, and lack of robustness of networks. For the depth camera, we initially tested out the ZED and the PrimeSense depth cameras. The ZED was promising, however it ended up being deprecated and lacked the human tracking SDK capability, so we turned to the Intel Realsense D455 camera. This camera had fewer SDK capabilities, which led us to adding OpenPose in the first place. + +A challenge related to the datasets used in training included lack of documentation of H3.6M keypoint joint labels. This made it more difficult to extract the key body parts we wanted to translate into obstacles. Also, it was difficult to format the OpenPose skeleton for the expected siMLPe skeleton structure. OpenPose was formatted with 25 joints, while siMLPe trains on 22 joints but outputs 32 joints. This indirect translation led to uncertainty in the siMLPe output. There was also a lack of robustness to un-detected keypoint joints. When OpenPose does loses tracking, it will return (0, 0) for the 2D location of that joint. Directly sending this data to siMLPe results in inaccurate predictions. + +## Results +We visualize the detected 2D joint keypoints using OpenPose overlaid on the live RGB camera feed in Figure \ref{fig:openposemap}. The right side of each image shows the extracted skeleton structure in 2D space. This mapping provides the foundation for accurate 3D human pose reconstruction in our pipeline when combined with depth information. + +
+Teaser Figure +
+ +We show the predicted future joint trajectories from the siMLPe network over several frames. Each plot visualizes the progression of joint movements based on past observed poses, demonstrating the model’s ability to anticipate natural human motion with minimal latency. + +
+Teaser Figure +
+ +The predicted human motion from siMLPe were used to generate virtual obstacles around the human body. These obstacles are placed into the motion planning environment in Gazebo, allowing the robot arm to plan and execute collision-free trajectories in real-time alongside a moving human collaborator. + +In this work, we showed a strong proof-of-concept for a motion planner using predicted human motion. We were able to apply siMLPe in an online robotic system through the integration and connection of multiple disjoint open-source software. + +
+Teaser Figure +
+ +## Future Work + +In this work, we assumed that an artist's movements are not any different from collected data of human movements, such as walking, talking on the phone, or other everyday activities in the 3.6M Human Dataset. Some future work could include creating a dataset of artist movements while painting using motion capture. Modifying siMLPe’s architecture or retraining it for artist motions could also be a way to improve our model for artistic applications. For example, siMLPe does not have any nonlinear activation functions, so attempting to add back some of the complexity of human tracking could improve the model. + +Integrating better depth cameras or body tracking modules other than OpenPose could also help our predictions. For measuring the state of a person's joints, the most ideal setup would be using motion capture, which would definitely be applicable to an artist. While OpenPose provided us with accurate and real-time joint measurements, it sometimes found human skeletons in random objects even when no humans were in frame and was somewhat noisy. Validation of predicted pose data from siMLPe against motion capture would also be an important next step to both improving the model and assuring the results we are getting are accurate. + +Improving online obstacle generation would also improve this project. Our current simulated environment runs slowly due to the delay in sending obstacles to Gazebo. This would only decrease in performance if we increased the fidelity of the human, so it would be necessary to improve our performance for the best online results. Also, adding rotation of body links using quaternions from the pose data would greatly improve the motion planning of the robot, as the simulation is more accurate to real life. + +Finally, training the network to work even with missing joints or faulty sensor measurements would improve the robustness of the system. To do this, we could record OpenPose data and use it for training siMLPe, as well as change the expected joint input format and number of joints to train on. Retraining the network would likely provide a large increase in accuracy of our predictions, especially since the data is collected using the sensors on our real system. + +## Acknowledgement + +We would like to thank the Deep Rob course and staff for providing us with the knowledge of different deep learning architectures. We also want to thank the Robot Studio Lab for providing us with the resources and project to complete the work. + +## Citation + +If you found our work helpful, consider citing us with the following BibTeX reference: + +``` +@article{chen_wu_2025deeprob, + title = {Human Aware Motion Planning for 6 DoF Robot Arm in Painting Application}, + author = {Chen, Eric and Wu, Emily}, + year = {2025} +} +``` + + +## Contact + +If you have any questions, feel free to contact [Eric Chen](mailto:erche@umich.edu) or [Emily Wu](mailto:emilyywu@umich.edu). diff --git a/reports/deweathered.md b/reports/deweathered.md new file mode 100644 index 0000000..66e2153 --- /dev/null +++ b/reports/deweathered.md @@ -0,0 +1,53 @@ +--- +layout: project +parent: Reports +title: Improving the Robustness of Object Detection Under Hazardous Conditions +description: This is a final project report for DeepRob at the University of Michigan. +authors: + - name: Rui Li + affiliation: University of Michigan + - name: Yuqing Luo + affiliation: University of Michigan + - name: Inbum Park + affiliation: University of Michigan + - name: Ziyang Xuan + affiliation: University of Michigan +--- + + + +
+Teaser Figure +
+ + + + + + +## Abstract + +In this project, we evaluated the baseline performance of DETR [(Carion et al., 2020)](https://arxiv.org/abs/2005.12872){: target="_blank" rel="noopener noreferrer"}, a transformer-based object detector, under adverse weather conditions using the DAWN dataset. We improved detection robustness by fine-tuning both the CNN backbone and transformer decoder, allowing the model to better handle low-visibility scenarios like rain, snow, fog, and sandstorms. Data augmentation techniques were also applied to simulate hazardous conditions and enhance generalization. Our results show that fine-tuning on domain-specific datasets improves performance, though challenges remain when dealing with diverse and severe weather. Future work will focus on integrating de-weathering modules to further boost detection reliability. + + +## Results + +We compare the baseline model with two versions: a finetuned model ("Finetuned") and a deweathered model ("De-weathered"). Among the eight weather types in the DAWN dataset [(Kenk and Hassaballah, 2020)](https://arxiv.org/abs/2008.05402){: target="_blank" rel="noopener noreferrer"}, 'dusttornado' is excluded due to having only two duplicate samples. Results are reported for six weather types β€” rain storm, sand storm, haze, mist, foggy, and snow storm β€” with sample counts ranging from 7 to 28. Evaluation follows the baseline metrics, using six Average Precision (AP) and six Average Recall (AR) scores. AP and AR are further broken down by object size (small, medium, large) and by maximum detections allowed (1, 10, 100). The IoU=0.5:0.95 metric represents an average over multiple IoU thresholds. The details in the result comparison for each test case can be found in the report. We also used the finetuned model on a bad weather driving vedio as shown below. + + +## Project Video + +
+
+ +
+
+ + + +## Contact + +If you have any questions, feel free to contact [Inbum Park](mailto:ibpark@umich.edu). diff --git a/reports/example.md b/reports/example.md index 3150b97..19e0068 100644 --- a/reports/example.md +++ b/reports/example.md @@ -7,15 +7,15 @@ authors: - name: Anthony Opipari social: "https://topipari.com" affiliation: University of Michigan - - name: Huijie Zhang - social: "https://www.huijiezh.com/" + - name: Xiaoxiao Du + social: "https://xiaoxiaodu.net/" affiliation: University of Michigan - - name: Jiyue Zhu - social: "https://jiyuezh.github.io/" + - name: Edmond Tong + affiliation: University of Michigan + - name: Yifu Lu + affiliation: University of Michigan + - name: Dalton Richardson affiliation: University of Michigan - - name: Karthik Desingh - social: "https://karthikdesingh.com/" - affiliation: University of Minnesota - name: Odest Chadwicke Jenkins social: "https://ocj.name/" affiliation: University of Michigan @@ -24,7 +24,13 @@ authors:
-Teaser Figure +Teaser Figure +
+ + + @@ -37,7 +43,7 @@ This course covers the necessary background of neural-network-based deep learnin Visual results are great for project webpages; exciting results can captivate an audience and convey dense information efficiently. We suggest including images, figures, animations, and videos on your webpage. For example, static images can be displayed as shown below: -![DeepRob Logo](/assets/logos/favicons/UMich_favicon_dark.png) +![DeepRob Logo]({{ site.baseurl }}/assets/logos/favicons/UMich_favicon_dark.png) ## Project Video @@ -56,10 +62,10 @@ You can display a video with your model's results by either uploading to youtube If you found our work helpful, consider citing us with the following BibTeX reference: ``` -@article{opipari2023deeprob, +@article{opipari2024deeprob, title = {Example Project: A final project template for DeepRob}, - author = {Opipari, Anthony and Zhang, Huijie and Zhu, Jiyue and Desingh, Karthik and Jenkins, Odest Chadwicke}, - year = {2023} + author = {Opipari, Anthony and Du, Xiaoxiao and Tong, Edmond and Lu, Yifu and Richardson, Dalton and Jenkins, Odest Chadwicke}, + year = {2024} } ``` Be sure to update this reference to include your team's author information for correct attribution! diff --git a/reports/how-to.md b/reports/how-to.md new file mode 100644 index 0000000..a937ea4 --- /dev/null +++ b/reports/how-to.md @@ -0,0 +1,377 @@ +--- +layout: project +parent: Reports +title: How-To: Make a project website for DeepRob +description: This is a tutorial showing how you can develop a final project webpage for DeepRob at the University of Michigan. +authors: + - name: Anthony Opipari + social: "https://topipari.com" + affiliation: University of Michigan +--- + +--- + + +#### Table of Contents + +- [Abstract](#abstract) +- [Prerequisites](#prerequisites) +- [Building a Local Clone](#building-a-local-clone) +- [Inspecting an Example Project Page](#inspecting-an-example-project-page) + - [Page Formatting](#page-formatting) + - [Page Title](#page-title) + - [Page Authors](#page-authors) + - [Project Images](#project-images) + - [Project Links](#project-links) + - [Project Videos](#project-videos) + - [Embedded Code](#embedded-code) +- [Adding a New Project Page](#adding-a-new-project-page) +- [Submitting a Pull Request to Publish your Page](#submitting-a-pull-request-to-publish-your-page) +- [Contact for Questions](#contact-for-questions) + + +{: .note-title } +> tl;dr +> +> You will describe the content on your page as a [markdown](https://en.wikipedia.org/wiki/Markdown){: target="_blank" rel="noopener noreferrer"} file (e.g. [this page is itself a markdown file](https://raw.githubusercontent.com/opipari/DeepRobWeb/w25/reports/how-to.md){: target="_blank" rel="noopener noreferrer"}). The markdown file gets converted by an application, called [Jekyll](https://en.wikipedia.org/wiki/Jekyll_%28software%29){: target="_blank" rel="noopener noreferrer"}, into a static [HTML](https://en.wikipedia.org/wiki/HTML){: target="_blank" rel="noopener noreferrer"} file and is served on the internet at a specific domain (e.g. 'deeprob.com') and subdirectory (e.g. '/w25/reports/how-to/'). + + + + + +--- + + + +## Abstract + +This tutorial is written to give a step-by-step tutorial on how your group can develop and design a final project webpage in the DeepRob 'style'. This tutorial involves concepts and tools used in web-development but we will assume no previous web-development experience. + + +--- + + +## Prerequisites + +Before we can develop a new webpage and style it with our final project content, we need to install a useful set of web-development tools. Specifically, we need the Ruby programming language and the Jekyll application. Jekyll will be used for creating a local copy of the DeepRob website that we can develop ontop of while designing our new webpage. Jekyll is an application built with Ruby, and so we need Ruby for running Jekyll. + +**On your local development machine:** +1. Install [Ruby](https://www.ruby-lang.org/en/documentation/installation/){: target="_blank" rel="noopener noreferrer"} +2. Install [Jekyll](https://jekyllrb.com/docs/installation/){: target="_blank" rel="noopener noreferrer"} +3. Clone the DeepRob source code + + ```sh + git clone -b w25 git@github.com:opipari/DeepRobWeb.git + ``` + + +{: .highlight } +As part of the Ruby installation, another tool called '[Bundler](https://bundler.io/){: target="_blank" rel="noopener noreferrer"}' should be installed automatically. We'll use Bundler to manage our Ruby dependencies for building the static site with Jekyll. + + +--- + +## Building a Local Clone + +Now that we have installed our software requirements and cloned the website source code, let's convert that source code into a working static site that we can add onto. + +Our goal here is to end up with an exact replica of [deeprob.org/w25/](https://deeprob.org/w25/){: target="_blank" rel="noopener noreferrer"} but which is entirely contained and served by your local development machine. Once this is working we can add a new page and modify it locally before pushing those changes into the official [DeepRob repository](https://github.com/opipari/DeepRobWeb){: target="_blank" rel="noopener noreferrer"} as a pull request. + +**Here are the steps to build our local site:** + +1. Open a terminal window +2. Navigate to the root directory of the cloned w25 repository + + ```sh + cd DeepRobWeb + ``` +3. Install the Ruby gem dependencies needed to build the static + + ```sh + bundle install + ``` +4. Build and serve the static site locally + + ```sh + bundle exec jekyll serve + ``` + +{: .highlight } +If your build fails with an error message involving webrick not being included in your version of Ruby, you may need to run `gem install webrick` before running the `bundle exec` command in the above step 4. + +After running the above commands, you should see terminal output similar to what's shown below: + +```sh +Configuration file: /path.../DeepRobWeb/_config.yml + Source: /path.../DeepRobWeb + Destination: _site/w25/ + Incremental build: disabled. Enable with --incremental + Generating... + Remote Theme: Using theme just-the-docs/just-the-docs + done in 15.273 seconds. + Auto-regeneration: enabled for '/path.../DeepRobWeb' + Server address: http://127.0.0.1:4000/w25/ + Server running... press ctrl-c to stop. +``` +This message indicates a few noteworthy things: +1. The build worked without any errors and stored the resulting static site files in the `_site/w25/` directory +2. The build process took 15.273 seconds (it will be slightly faster on future builds) +3. The build is using `Auto-regeneration`, which just means that as long as this process keeps running, any changes to the source code in the `DeepRobWeb/` folder will automatically trigger the build process and update the served files +4. The server is running and hosting our static site at the location: [http://127.0.0.1:4000/w25/](http://127.0.0.1:4000/w25/){: target="_blank" rel="noopener noreferrer"} +5. We can stop the server by pressing `ctrl-c` or by closing the terminal window + +{: .highlight } +**Note: This server is accessible only on your local development machine.** In other words, it is not publishing anything to the public internet. This is useful because it allows you to develop locally before publishing the final page to the public internet. + +Now that the server is running, try navigating to the local site by going to the address: [http://127.0.0.1:4000/w25/](http://127.0.0.1:4000/w25/){: target="_blank" rel="noopener noreferrer"} in your browser. **If this doesn't work, or the website doesn't match the one published at [https://deeprob.org/w25/](https://deeprob.org/w25/){: target="_blank" rel="noopener noreferrer"}, please let [Anthony](mailto:topipari@umich.edu) know.** + + +--- + + +## Inspecting an Example Project Page + +Now that we have a local copy of the website, let's take a look at an example page for inspiration and direction on how we can design and structure our bew webpage. Specifically, looking into the [`reports/`](https://github.com/opipari/DeepRobWeb/tree/w25/reports){: target="_blank" rel="noopener noreferrer"} directory, we can see the source code used to generate this webpage as well as a simple example project page located at [`reports/example.md`](https://github.com/opipari/DeepRobWeb/blob/w25/reports/example.md){: target="_blank" rel="noopener noreferrer"}. + + +To get a better sense of how this example page works, let's open the local page side-by-side with its source code. The page can be opened at: [http://127.0.0.1:4000/w25/reports/example/](http://127.0.0.1:4000/w25/reports/example/){: target="_blank" rel="noopener noreferrer"}. The local page's source code, `reports/example.md`, can be opened with your favorite text editor. If all goes as planned, you should see a development environment similar to what's shown below: + +![Development environment for example webpage]({{ site.baseurl }}/assets/projects/reports/how-to/development_environment.webp) + +Next, let's step through the page elements one-by-one to understand how the markdown content is mapped onto HTML elements. + + +### Page Formatting + +Taking a look at `reports/example.md`, the first thing we see is this header block: + +```yaml +--- +layout: project +parent: Reports +title: Example Project: A final project template for DeepRob +description: This is a final project report for DeepRob at the University of Michigan. +authors: + - name: Anthony Opipari + social: "https://topipari.com" + affiliation: University of Michigan + - name: Xiaoxiao Du + social: "https://xiaoxiaodu.net/" + affiliation: University of Michigan + - name: Edmond Tong + affiliation: University of Michigan + - name: Yifu Lu + affiliation: University of Michigan + - name: Dalton Richardson + affiliation: University of Michigan + - name: Odest Chadwicke Jenkins + social: "https://ocj.name/" + affiliation: University of Michigan +--- +``` + +What's going on here? Well, this block of code is called ['front matter'](https://jekyllrb.com/docs/front-matter/){: target="_blank" rel="noopener noreferrer"} and is used by Jekyll to format the markdown content into a webpage. Front matter is essentially just a block of YAML code, storing key-value pairs of variables. Under the hood, while Jekyll is converting our markdown into an HTML page, it will use these variables for deciding how to organize the page's eventual HTML file. + +**For formatting your project webpage, there are two required components you must include in your page's front matter:** + +```yaml +layout: project +parent: Reports +``` + + +
+Click here for more details, if you're curious!> + +The `layout` key is a generic front matter key specifying which HTML *scaffold* (or *layout*) to use when converting the markdown content *into* HTML. I've predefined an HTML structure for these project pages, which is defined by the `project` name. For those extra curious, you can inspect the `project` HTML layout definition in [`_layouts/project.html`](https://github.com/opipari/DeepRobWeb/blob/w25/_layouts/project.html){: target="_blank" rel="noopener noreferrer"}. + +The `parent` key is used by our DeepRob theme, which is based on the [Just the Docs](https://just-the-docs.com/){: target="_blank" rel="noopener noreferrer"} theme, as a bookkeeping flag to ensure Jekyll doesn't clutter the navigation bar on the left of every page. Specifically, by using this key and assigning it a value of `Reports` we are telling Jekyll that the example project page shouldn't be shown on the navigation bar and that it belongs as a child page of the reports index at [`/w25/reports/`](https://github.com/opipari/DeepRobWeb/blob/w25/reports/index.md){: target="_blank" rel="noopener noreferrer"}. + + +
+ +### Page Title + +Within the page front matter, we specify the title of the page. In the case of the example page: + +```yaml +title: Example Project: A final project template for DeepRob +``` + +Setting this value for `title` results in the title, `Example Project: A final project template for DeepRob`. The provided layout format will automatically position and format the title for you at the top of your project page in HTML. You'll notice we use `:` where a colon is expected. This is because the colon, `:`, is a reserved character in YAML front matter but the character can be inserted with the proper escape (`:`). + + +### Page Authors + +The final key-value pair in our example front matter defines the page's authors. Specifically, this block: + +```yaml +authors: + - name: Anthony Opipari + social: "https://topipari.com" + affiliation: University of Michigan + - name: Xiaoxiao Du + social: "https://xiaoxiaodu.net/" + affiliation: University of Michigan + - name: Edmond Tong + affiliation: University of Michigan + - name: Yifu Lu + affiliation: University of Michigan + - name: Dalton Richardson + affiliation: University of Michigan + - name: Odest Chadwicke Jenkins + social: "https://ocj.name/" + affiliation: University of Michigan +``` + +Our provided project layout will format this list of authors into an equally-spaced row. Notice that the `social` attributes for each author are parsed into a proper weblink and that these attributes are optional: any authors that don't have an associated `social` attribute defined do not link to any external page. + +### Project Images + +Including a captivating image after the author list is a good strategy to summarize your project with a captivating visual element. The following code is used for placing the `deeprob.gif` file located in the `/w25/assets/projects/reports/example/` subdirectory within the example project page: + +```html + +
+Teaser Figure +
+``` + +Notice that a css-class named `center-image` is applied as a parent element of our `img` element. The details can be ignored, but we provide this class to help you with centering images. + +If we don't care about cenetering, we can more easily include images using the following command: + +```markdown +![Teaser Figure]({{ site.baseurl }}/assets/projects/reports/example/deeprob.gif) +``` + + +### Project Links + +We provide default styling for any project-specific links you would like to include. For example, to include links to your report or your codebase, you can include the following code block below your front matter block: + +```html + +``` + +where `` and `` are replaced with actual web links. This block of code results in the following buttons: + +
+ +
+ + +### Project Videos + +If you have any video results, those can be displayed either by using HTML video tags and hosting the video file with the `assets/` directory or by hosting the video on a service like YouTube. YouTube provides iframe HTML code that can be directly embedded into webpages as follows: + +```html +
+
+ +
+
+``` + +Placing the above code into your markdown file will result in the following embeded iframe: + +
+
+ +
+
+ + +### Embedded Code + +If you'd like to embed code, markdown provides [syntax highlighting](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/creating-and-highlighting-code-blocks){: target="_blank" rel="noopener noreferrer"}. Simply wrap your code in backticks (\`) for in-line code or for code blocks, wrap your code in backtick-triplets. + +```` +``` +# code goes here +``` +```` + +### Other Features! + +Feel free to experiment with embedding content into your markdown webpage. For example, [plotly figures](https://plotly.com/chart-studio-help/embed-graphs-in-websites/){: target="_blank" rel="noopener noreferrer"} can be embedded into HTML--even [3D figures](https://plotly.com/python/3d-charts/){: target="_blank" rel="noopener noreferrer"}! + +--- + +## Adding a New Project Page + +Now that we've [inspected an example project page](#inspecting-an-example-project-page), we're ready to design our own! + +**Here are the required steps:** + +1. Create a new markdown file (`.md`) and place it within the `reports/` directory +2. Fill in the markdown file's front matter block as described [above](#page-formatting) +3. Create a new asset directory with the same name as your markdown file and place the directory within `assets/projects/reports/` +4. Place any image, video, PDF, or other file assets you would like to host into your new asset directory +5. Add content to your webpage + +--- + +## Submitting a Pull Request to Publish your Page + +Now that you've made a webpage, let's make a pull request to the DeepRobWeb repository to publish your webpage on the internet. + +**Here are the required steps:** + +1. Fork the [DeepRobWeb](https://github.com/opipari/DeepRobWeb){: target="_blank" rel="noopener noreferrer"} repository + + ![Image of fork button on github graphic user interface]({{ site.baseurl }}/assets/projects/reports/how-to/fork_button.webp) + +2. Make sure to uncheck the box asking if you want to copy only the `main` branch + + ![Image of fork options on github graphic user interface]({{ site.baseurl }}/assets/projects/reports/how-to/fork_options.webp) + +3. Add your fork as a remote + + ```sh + git remote add projectfork git@github.com:/DeepRobWeb.git + ``` + +4. Now sync your project with `origin` (i.e. the remote [DeepRobWeb](https://github.com/opipari/DeepRobWeb){: target="_blank" rel="noopener noreferrer"} repository) + + ```sh + git pull origin w25 + ``` + +5. Now add any asset files and commit your changes + + ```sh + git add && git commit -m "Adding my final project webpage" + ``` + +6. We're now ready to push your changes into your forked repository + + ```sh + git push projectfork w25 + ``` + +7. In the GitHub GUI look for and click the 'Compare & Pull request' button + + ![Image of compare button on github graphic user interface]({{ site.baseurl }}/assets/projects/reports/how-to/compare_button.webp) + +8. After clicking this button, you can submit your pull request! After this, I (Anthony) will review the changes and then publish them to the official repository and webpage. + + ![Image of pull request button on github graphic user interface]({{ site.baseurl }}/assets/projects/reports/how-to/pull_request_button.webp) + + +--- + +## Contact for Questions + +If you have any questions, feel free to contact [Anthony](mailto:topipari@umich.edu). + +--- + + +[Back to Top](#) diff --git a/reports/index.md b/reports/index.md index f944f25..e2b6ffe 100644 --- a/reports/index.md +++ b/reports/index.md @@ -3,19 +3,39 @@ layout: page title: Final Reports description: >- Final project reports listing. -nav_order: 7 +nav_order: 9 has_children: false has_toc: false -permalink: /reports/ --- # Final Project Reports {:.no_toc} +### [Learning-Based Segmentation & Grasping on FETCH with PointNet++ & AnyGrasp]({{ site.baseurl }}/reports/FETCH-GRASP/) +Marzuk Kalluparamban, Hemanth Murali, Maithreyan Ganesh, and Jyotishka Dutta Gupta +### [DOPE-Plus: Enhancements in Feature Extraction and Data Generation for 6D Pose Estimation]({{ site.baseurl }}/reports/DOPE-Plus/) +Jeffrey Chen, Yuqiao Luo, and Longzhen Yuan -### [Example Project: A final project template for DeepRob](/reports/example/) -Anthony Opipari, Huijie Zhang, Jiyue Zhu, Karthik Desingh, and Odest Chadwicke Jenkins +### [DiseasedCNN-Lite: Leveraging Transfer Learning and Knowledge Distallation for Leaf Disease Classification]({{ site.baseurl }}/reports/leaf/) +David Smith, Jess Wu, and William Hasey +### [(Eye) BAGS: Bundle-Adjusting Gaussian Splatting]({{ site.baseurl }}/reports/BAGS/) +Ruben Fonseca and Sacchin Sundar + +### [Improving the Robustness of Object Detection Under Hazardous Conditions]({{ site.baseurl }}/reports/deweathered/) +Rui Li, Yuqing Luo, Inbum Park, and Ziyang Xuan + +### [Learning-Based Segmentation & Grasping on FETCH with PointNet++ & AnyGrasp]({{ site.baseurl }}/reports/FETCH-GRASP/) +Marzuk Kalluparaman, Hemanth Murali, Maithreyan Ganesh, and Jyotishka Dutta Gupta + +### [Human Aware Motion Planning for 6 DoF Robot Arm in Painting Application]({{ site.baseurl }}/reports/aura/) +Eric Chen and Emily Wu + +### [Convergence Acceleration For DiffusionDet: On PROPS Dataset]({{ site.baseurl }}/reports/diffdet/) +Gongxing Yu, Liangkun Sun, and Yang Lyu + +### [Dynamic Weather-Aware Lane Detection]({{ site.baseurl }}/reports/lane/) +Justin Boverhof, Joseph Fedoronko, Anay Moitra, and Andrew Rodriguez diff --git a/reports/lane.md b/reports/lane.md new file mode 100644 index 0000000..d374413 --- /dev/null +++ b/reports/lane.md @@ -0,0 +1,62 @@ +--- +layout: project +parent: Reports +title: "Dynamic Weather-Aware Lane Detection" +description: "Final project report for DeepRob at the University of Michigan." + +authors: + - name: "Justin Boverhof" + affiliation: "University of Michigan" + year: "Robotics" + - name: "Joseph Fedoronko" + affiliation: "University of Michigan" + year: "Robotics" + - name: "Anay Moitra" + affiliation: "University of Michigan" + year: "Computer Science" + - name: "Andrew Rodriguez" + affiliation: "University of Michigan" + year: "Robotics" +--- + + + +
+Teaser Figure +
+ + + + + +## Abstract + +Lane detection is a critical task for autonomous driving, enabling safe navigation by identifying road boundaries and lane structures. In this project, we enhance the Ultra Fast Lane Detection (UFLD) framework to improve its robustness under challenging conditions, such as adverse weather and low visibility. Our contributions include the integration of a Feature Pyramid Network (FPN) for multi-scale feature extraction, a dynamic weather prompting mechanism to adapt the model based on current environmental conditions, and an unsupervised weather discovery system to eliminate the need for manual weather labeling. Together, these improvements enable real-time, weather-adaptive lane detection, demonstrating strong potential for practical deployment in autonomous systems operating in diverse environments. + +## Our Extensions +In order to address the concernes we had about lane detection in adverse weather conditions, we implemented an FPN layer after the backbone with the goal of improving on the original resnet-18 feature extraction. Additionally, we experiemented with dynamic prompting as another solution to lane detection in bad conditions. We designed a weather condition module that encodes the current weather as a learnable embedding. Each weather type (clear, rain, snow, fog) is represented initially as a one-hot vector, which is projected into a dense embedding space through a small multi-layer perceptron (MLP). This weather embedding is then concatenated with the feature map output from the backbone network. We introduced a fusion MLP to process combined features and extended the parsingNet lane detection model to accept a weather_condition input, ensuring weather-aware features throughout. The data loader was updated to supply weather prompts during training and inference, enabling dynamic prompting that improves detection across conditions without needing separate models. + +## Results + +To validate the results of our methodology, we decided to train both two models, one without the addition of a simple weather-aware classifier and one with it. Both were trained for 2 epochs. We then utilized the built in evaluation command that came with the UFLD paper to see the precision, recall, and the F1 scores. For our tests, we were able to evaluate the model without any additions in all the different environment but did a basic score for the model with additions. + +### Table 1. Precision, Recall, and F1 Score for the Colab Dynamic Prompting Demonstration. + +| Experiment | Precision | Recall | F1 | +|----------------------------|-----------|--------|-------| +| Colab Dynamic Prompting | 0.9876 | 0.9853 | 0.9890 | + +To validate our dynamic prompting and clustering pipeline, we ran a lightweight experiment on 1000 CULane images. Features were clustered into four groups, and a simple weather-aware classifier trained on these clusters achieved high performance, confirming our method's effectiveness at modeling weather variations with limited supervision. + +## Citation + +``` +@article{DynamicWeatherLaneDetection2025deeprob, + title = {FPN Lane Detection}, + author = {Boverhof, Justin and Fedoronko, Joseph and Moitra, Anay and Rodriguez, Andrew}, + year = {2025} +} +``` diff --git a/reports/leaf.md b/reports/leaf.md new file mode 100644 index 0000000..a8f96ed --- /dev/null +++ b/reports/leaf.md @@ -0,0 +1,107 @@ +--- +layout: project +parent: Reports +title: DiseasedCNN-Lite: Leveraging Transfer Learning and Knowledge Distallation for Leaf Disease Classification +description: This is a final project report for DeepRob at the University of Michigan. +authors: + - name: David Smith + email: smitd@umich.edu + affiliation: University of Michigan + year: 3rd Year Robotics B.S.E and CS B.S.E + hometown: Hillsboro, OR + - name: Jess Wu + email: jessyw@umich.edu + affiliation: University of Michigan + year: 3rd Year Robotics B.S.E and CS B.S.E + hometown: Reston, VA + - name: William Hasey + email: whasey@umich.edu + affiliation: University of Michigan + year: 3rd Year Robotics B.S.E + hometown: Ypsilanti, MI +--- + +
+Teaser Figure +
+ + + +## Abstract + +Identification of diseased crops through the symptoms displayed on their leaves can help farmers manage large-scale operations. Integrating deep learning with robotics provides a promising way for farmers to maintain their crops in an efficient and cost-effective manner. However, deep learning models are often too large and computationally complex to be deployed in agricultural robots. Previous research has been conducted on small datasets and has produced models that can identify diseases for a specific species of crop. Drawing from previous research on the study of apple leaves and associated diseases, along with studies on knowledge distillation, this work proposes a lightweight model that can classify 17 common diseases spanning 14 different crop species. + +## Motivation and Original Paper + +Agricultural robotics is a fast-growing area of research---in particular, many researchers are focusing on using automation to perform tedious everyday tasks such as weeding, scouting, and harvesting. Properly trained robots would be less prone to missing important information when working fields, and would not fall victim to dangerous environmental factors such as extreme heat, pesticides, and hazardous machinery. As robotics has become a more prominent field in engineering, interest in agricultural robotics has spiked, and deep learning has been a heavily researched area relating to this topic. However, as interest in this specific area of robotics has increased, the requirements for a practical machine learning model have become narrower. Any model created must be highly accurate, generalizable to real-world data, and require as little memory and computational power as possible, such that a robot's ability to perform tasks remains more efficient than a human's. + +Image processing and classification of diseased leaves has been researched before, and a majority of studies are specific to one crop (apples and tomatoes are by far the most common). Many different machine learning architectures have been attempted---some deep models, some not---and most have been able to achieve test accuracies upwards of 90%-95%. These studies focused on one crop species and had relatively small datasets of a few thousand training images creating a lack of versatility. + +DeepCNN is a convolutional neural network that can identify three different apple leaf diseases, as well as a healthy leaf. The study laid the foundation for our work on disease detection. After training for 1000 epochs, DeepCNN achieved an accuracy of 98% on a dataset consisting of 3,171 images (2,228 training, 634 validation, 319 testing). To lay the foundation for our model, we attempted to reproduce the final results. The study provided the dataset used and outlined the exact architecture of DeepCNN, including specific dimensions for each layer; however, specifics for the data augmentation performed were not provided. + +## Our Extensions + +In our attempt to reproduce these results, we discovered that DeepCNN would drastically overfit to the data when trained for 1,000 epochs. Instead, we achieved 96% testing accuracy after 250 epochs of training, and training for any longer would lead to a decrease in overall model performance. We theorize that this discrepancy could be a result of the differing data augmentation. Since the operations performed on the images were not detailed, we researched and used relatively common operations (horizontal and vertical flips, shearing, scaling, shifting) and applied them to each training image with a 40% probability. Data augmentation is used to prevent overfitting, but its misuse can harm model performance and actually increase the chance of overfitting. Therefore, we believe that our data augmentation was substantially different, and led to the drastic change in accuracy as the number of training epochs passed 250. + +One limiting factor of the DeepCNN network proposed was that the dataset used only contained four classes on a single species. Therefore, we first aimed to build a network that could identify many more species and diseases at a similar rate. To do this, we used the Plant Village dataset. This dataset contains 54,305 images spanning 14 different species of crop and 17 common diseases, amounting to 38 separate classes. We split the dataset into training, testing, and validation using the standard 70-20-10 split. + +As a preemptive measure to prevent overfitting, we probabilistically applied different augmentation operations to each training image. Given that there were only approximately 1,000 images per class in the Plant Village training set, we determined that data augmentation would be our most effective tool against overfitting. Our data augmentation consisted of shearing, scaling, horizontal and vertical flipping, and elastic distortion. + +To determine which pretrained model to use as a backbone for transfer learning, we benchmarked the performance of seven commonly used models against the Plant Village dataset. This process included adding one fully connected layer to the end of each pretrained model, and then training for 20 epochs. EfficientNet-B0 and ResNet50 performed best. Both reached a loss below 3.0 and a validation accuracy above 80%. After this benchmarking process, we trained DiseasedCNN for three epochs with both EfficientNet-B0 and ResNet50 to determine which model meshed best with our fine-tuning layers. We determined that ResNet50's feature extraction was better suited for our needs, as EfficientNet-B0 achieved a test accuracy of 61.51% and ResNet50 achieved a test accuracy of nearly 75%. Therefore, we decided to use ResNet50 as our pretrained backbone for DiseasedCNN. + +## DiseasedCNN + +DiseasedCNN is the powerful model that we built to serve as a teacher during the knowledge distillation process. It relies on ResNet50's feature extraction to provide a basis for its classification abilities, and utilizes fine-tuning layers to calibrate its prediction to our dataset. It also employs a rectified linear unit (ReLU) activation function to provide nonlinearity, and softmax is used on the output to produce a probabilistic prediction. + +The foundation of DiseasedCNN is a ResNet50 backbone with the last three layers (adaptive average pooling, flattening, and fully connected) removed. This allows us to feed the output of the network into our fine-tuning layers, while maintaining the spatial information that the network has extracted. + +The fine-tuning layer architecture is as follows: two convolutional layers with ReLU activations on the outputs, one batchnorm layer and one adaptive average pooling layer, and two fully connected layers, with ReLU activation after the first layer and softmax after the second. + +## Results + +DiseasedCNN trained for 50 epochs on the Plant Village dataset, with model validation performed every 13 batches. During training, we used an Adam optimizer with a learning rate of 0.0005, a beta_1 value of 0.9, and a beta_2 value of 0.999. In addition, we used an exponential learning rate scheduler with a gamma factor of 0.9. Our loss function for DiseasedCNN's training was the standard PyTorch cross-entropy loss function. The model was trained for approximately two hours, and while the average epoch loss remained relatively high, the model reached a final validation accuracy of 93%. + +DiseasedCNN's inference time is relatively high compared to other models of the same size. We would like to see this value within the range of 10 to 30 milliseconds. This is particularly important for any integration with agricultural robotics, because a longer inference time means that it takes more energy and computational power to run inference. Maximizing battery life is a major aspect of agricultural robotics. Furthermore, DiseasedCNN is too large for many agricultural robots and drones. This is largely due to its ResNet50 backbone, which provides the necessary feature extraction and enables such high classification accuracy. Overall, DiseasedCNN is a good start, but in order to be a practical solution in the field of optimized agricultural robotics, DiseasedCNN would need to see a drastic reduction in size and inference time. + +## DiseasedCNN-Lite + +DiseasedCNN-Lite is a much smaller model that we built to serve as a student during the knowledge distillation process. Instead of relying on a large, pretrained backbone such as ResNet50 to perform feature extraction, it extracts a small number of foundational features from a given image, while focusing on learning to predict the output of its teacher, DiseasedCNN. Since this model is much smaller than DiseasedCNN, we opted to use a Leaky ReLU activation function to prevent any gradients from vanishing during the training process. + +## Results + +DiseasedCNN-Lite trained for 100 epochs on the Plant Village dataset, with model validation performed at the end of each epoch. During training, we used many of the same hyperparameters as DiseasedCNN. We used an Adam optimizer with a learning rate of 0.0005, a beta_1 value of 0.9, and a beta_2 value of 0.999. We also used a learning rate scheduler to help optimize the training of the student model, with a gamma factor of 0.9. However, for DiseasedCNN-Lite we added a weight decay factor of 0.00001 to avoid large weights and prevent overfitting. Furthermore, DiseasedCNN-Lite trained with a T value of 4, as we chose to soften the teacher model's probability distribution, and an alpha value of 0.7, putting more emphasis on hard loss than soft loss. + +DiseasedCNN-Lite's inference time was much faster than DiseasedCNN's, and will likely require significantly less energy and computational power to run on an agricultural robot. In addition to this, DiseasedCNN-Lite was 334 times smaller than its teacher, DiseasedCNN. The total size of the model, along with the shortened inference time, makes DiseasedCNN-Lite a more practical solution than DiseasedCNN, and opens the door to integration with agricultural robots and drones. Although a test accuracy of 87% is relatively low compared to other existing models (which routinely achieve accuracies above 90%), DiseasedCNN-Lite provides a strong foundation for future research into training smaller student models. + +We believe that with slight changes to hyperparameters, and many more epochs of training, DiseasedCNN-Lite could increase its capabilities. We would have to be careful not to overfit to our dataset, and to prevent this our best option would likely be to increase our data augmentation. To best support an increase in DiseasedCNN-Lite's abilities, we would continue to calibrate DiseasedCNN in order to optimize its accuracy. Although unlikely, it is possible that DiseasedCNN-Lite could achieve a higher test accuracy than its teacher, DiseasedCNN. However, a more accurate teacher model would only be more helpful during training. + +
+Training Loss +Validation Accuracy +
+ + +## Conclusions + +DiseasedCNN-Lite achieved a respectable accuracy of 87%, while being an extremely lightweight model totaling only 390 kilobytes in size. Compared to the capabilities of its teacher, DiseasedCNN-Lite was capable of similar accuracies while shrinking the size of the model by a factor of 334. Overall, this model is still not ready to be deployed and tested with agricultural robots and drones, but it lays a solid foundation for future work. + +One of the main limitations of DiseasedCNN-Lite is that it can only predict diseases given an image containing only one leaf. A potential solution to this problem is to use a vision transformer as a pretrained backbone, and teach DiseasedCNN-Lite to not only identify diseased leaves, but also locate leaves with bounding boxes if given an image with multiple leaves. We would also like to increase the test accuracy of DiseasedCNN-Lite, and perform tests on how well it generalizes to real-world data. The next step after those improvements is to deploy the model onto an agricultural robot or drone, and test its capabilities in the real world. + +## Citations + +If you found our work helpful, consider citing us with the following BibTeX reference: + +``` +@article{2025DiseasedCNN-Lite, + title = {DiseasedCNN-Lite: Leveraging Transfer Learning and Knowledge Distallation for Leaf Disease Classification}, + author = {Smith, David and Wu, Jess and Hasey, William}, + year = {2025} +} +``` + +## Contact + +If you have any questions, feel free to contact any of [David Smith, Jess Wu, or William Hasey](mailto:smitd@umich.edu?cc=jessyw@umich.edu,whasey@umich.edu). diff --git a/staff.md b/staff.md index f66c6b2..7bdf67c 100644 --- a/staff.md +++ b/staff.md @@ -2,14 +2,15 @@ layout: page title: Staff description: A directory of the teaching staff for Deep Learning for Robot Perception at the University of Michigan. +nav_order: 10 --- # Deep Rob Course Staff -
-
+--- -# Instructors +## Instructors +
{% assign instructors = site.staffers | where: 'role', 'Instructor' |sort: 'order' %} {% for staffer in instructors %} @@ -17,19 +18,57 @@ description: A directory of the teaching staff for Deep Learning for Robot Perce {% endfor %}
+ +## Graduate Student Instructor +
+ +{% assign gsis = site.staffers | where: 'role', 'Graduate Student Instructor' |sort: 'order' %} +{% assign num_gsis = gsis | size %} +{% if num_gsis != 0 %} + +{% for staffer in gsis %} +{{ staffer }} +{% endfor %} +{% endif %} + +
+ +## Instructional Assistants
-# Research Associate +{% assign ias = site.staffers | where: 'role', 'Instructional Assistant' | sort: 'order' %} +{% for staffer in ias %} +{{ staffer }} +{% endfor %} + +
+ +## Advising Faculty +
-{% assign research_associates = site.staffers | where: 'role', 'Research Associate' %} -{% assign num_research_associates = research_associates | size %} -{% if num_research_associates != 0 %} +{% assign advising_faculty = site.staffers | where: 'role', 'Advising Faculty' %} +{% assign num_advising_faculty = advising_faculty | size %} +{% if num_advising_faculty != 0 %} -{% for staffer in research_associates %} +{% for staffer in advising_faculty %} {{ staffer }} {% endfor %} {% endif %}
+ + +# Office Hours Schedule +{: #weekly-schedule } + +
+{: .highlight } +**The schedule of instructor office hours, including the in-person locations, is provided in the following Google calendar.** +
+ +
+{: .note } +**For accessing office hours virtually, please refer to the calendar for each instructor's preferred Zoom link. If no Zoom link is listed, please join their office hours queue and share your personal Zoom link as your location.**
+ diff --git a/syllabus.md b/syllabus.md index c80d15e..763135e 100644 --- a/syllabus.md +++ b/syllabus.md @@ -5,9 +5,10 @@ description: Course policies and information pertaining to Deep Learning for Rob nav_order: 2 --- -# Course Syllabus +# Course Syllabus: Deep Learning for Robot Perception {:.no_toc} + ## Table of contents {: .no_toc .text-delta } @@ -16,14 +17,18 @@ nav_order: 2 --- +{: .highlight } +**Note: This course website and syllabus is still under development and is subject to change.** + ## About Robots need to see and understand their world to be able to interact with objects and perform useful tasks autonomously. Perception is the essential first step in the process for endowing robots to perform autonomously. Autonomous robots need to make sense of their sensory observations to represent the world around them – and enable their reasoning and action to a goal. Visual perception with cameras as sensors has matured due to the recent advancements in neural networks – which is especially true for performing visual recognition tasks such as object classification, detection, pose estimation, grasp pose detection, etc. -This course aims to cover the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision and enabling – for enabling robots to dexterously manipulate physical objects. During the first part of this course, students will learn to implement, train and debug their own neural networks. During the second part of this course, students will explore recent emerging topics in deep learning for robot perception and manipulation. This exploration will include analysis of research publications in the area, building up to reproducing one of these publications for implementation as a final course project. +This course aims to cover the necessary background of neural-network-based deep learning for robot perception – building on advancements in computer vision and enabling – for enabling robots to dexterously manipulate physical objects. During the first part of this course, students will learn to implement, train and debug their own neural networks. During the second part of this course, students will explore recent emerging topics in deep learning for robot perception and manipulation. This exploration will include analysis of research publications in the area, building up to reproducing and implementing state-of-the-art deep learning approaches as a final course project. This course builds on and is indebted to these existing courses (as a β€œstar” and a "fork" in the open source sense): -- [University of Michigan - ROB 498-002 / 599-009: Deep Learning for Robot Perception](/w23/){: target="_blank" rel="noopener noreferrer"} instructed by [Anthony Opipari](https://web.eecs.umich.edu/~justincj/){: target="_blank" rel="noopener noreferrer"}, [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"}, and [Karthik Desingh](https://karthikdesingh.com/){: target="_blank" rel="noopener noreferrer"} +- [University of Michigan - ROB 498-011 / 599-011: Deep Learning for Robot Perception](/w24/){: target="_blank" rel="noopener noreferrer"} instructed by [Xiaoxiao Du](https://xiaoxiaodu.net){: target="_blank" rel="noopener noreferrer"}, [Anthony Opipari](https://topipari.com/){: target="_blank" rel="noopener noreferrer"}, and [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"} +- [University of Michigan - ROB 498-002 / 599-009: Deep Learning for Robot Perception](/w23/){: target="_blank" rel="noopener noreferrer"} instructed by [Anthony Opipari](https://topipari.com/){: target="_blank" rel="noopener noreferrer"}, [Chad Jenkins](https://ocj.name/){: target="_blank" rel="noopener noreferrer"}, and [Karthik Desingh](https://karthikdesingh.com/){: target="_blank" rel="noopener noreferrer"} - [University of Michigan - EECS 498-007 / 598-005: Deep Learning for Computer Vision](https://web.eecs.umich.edu/~justincj/teaching/eecs498/WI2022/){: target="_blank" rel="noopener noreferrer"} instructed by [Justin Johnson](https://web.eecs.umich.edu/~justincj/){: target="_blank" rel="noopener noreferrer"} - [Stanford - CS231n: Deep Learning for Computer Vision](http://cs231n.stanford.edu/index.html){: target="_blank" rel="noopener noreferrer"} instructed by [Fei-Fei Li](https://profiles.stanford.edu/fei-fei-li){: target="_blank" rel="noopener noreferrer"} and [Andrej Karpathy](https://karpathy.ai/){: target="_blank" rel="noopener noreferrer"} @@ -48,76 +53,134 @@ The second half of the course will switch to seminar style covering following ad ## Prerequisites - Strongly encouraged prerequisites: - - Programming: ROB 320, EECS 281, or equivalent - Linear Algebra: ROB 101, MATH 214, MATH 217, or equivalent + - Multivariable Calculus (Math 215 or equivalent) + - Systems Programming and Algorithms: ROB 320, EECS 281, or equivalent - Recommended prerequisites: - Prior experience with the [Python programming language](https://www.python.org/){: target="_blank" rel="noopener noreferrer"} is recommended. - Familiarity with gradients and how to calculate them from vector calculus. - Familiarity with random variables and probability distributions from probability theory. - Familiarity with concepts from machine learning (e.g. EECS 445) will be helpful. +## Programming + +This course will primarily use Python and PyTorch + ## Textbook There is no required textbook for this course, however optional readings will be suggested from the textbook, ["Deep Learning" by Ian Goodfellow and Yoshua Bengio and Aaron Courville](https://www.deeplearningbook.org){: target="_blank" rel="noopener noreferrer"}. +For additional references, consider the following textbooks: + - "[Introduction to Robotics and Perception](https://www.roboticsbook.org/){: target="_blank" rel="noopener noreferrer"}" by Frank Dellaert and Seth Hutchinson + - "[Robotics, Vision and Control](https://link.springer.com/book/10.1007/978-3-642-20144-8){: target="_blank" rel="noopener noreferrer"}" by Peter Corke + - "[Computer Vision: Algorithms and Applications](http://szeliski.org/Book/){: target="_blank" rel="noopener noreferrer"}" by Richard Szeliski + - "[Foundations of Computer Vision](https://mitpress.mit.edu/9780262048972/foundations-of-computer-vision/){: target="_blank" rel="noopener noreferrer"}" by Antonio Torralba, Phillip Isola, and William T. Freeman + ## Lectures Lectures will take place in-person. -In-person lectures will be held on **Tuesdays and Thursdays from 3:00-4:30 PM EST in room 906 COOLG**. +In-person lectures will be held on **Mondays and Wednesdays from 12:00-1:30 PM ET in room CSRB 2246**. ## Discussion Sections -Discussions will take place in-person. +Discussions will take place in-person. -In-person discussions will be held on **Wednesdays from 3:30-5:30 PM EST in room 1311 EECS**. +In-person discussions will be held on **Tuesdays from 3:30-5:30 PM ET in room CSRB 2246**. -## Programming Projects +## Programming Projects (Individual) -You will complete 5 programming [projects](/projects/) over the course of the semester. All projects will be implemented using Python, Pytorch and Google Colab. +You will complete 5 individual programming [projects]({{ site.baseurl }}/projects/) over the course of the semester. All projects will be implemented using Python, Pytorch and Google Colab. -## Final Project +## Final Project (Group) -Instead of a final exam at the end of the semester, you will complete a [final project](/projects/finalproject/) working in groups of 1 to 3 students. +Instead of a final exam at the end of the semester, you will complete a final project working in groups of 1 to 3 students. The final project will entail five core deliverables: (1) a written paper review, (2) an in-class paper presentation, (3) reproducing the published results of an existing deep learning paper, (4) extending the chosen paper's methods and (5) documenting your reproduction and extension in a written report. The objective of the final project is for you to gain experience with state of the art approaches in deep learning and a sense of how research in the area is conducted. -## Quizzes +## Midterm Exam + +There will be a midterm exam that takes place in-class during the semester. -Throughout the semester, there will be a total of 16 quizzes administered through [gradescope](https://www.gradescope.com/courses/480760){: target="_blank" rel="noopener noreferrer"}. These quizzes will be posted before lecture sections throughout the semester and be available to take until the beginning of lecture that same day. Quizzes will be released at 7:00AM EST and must be submitted by 3:00PM EST. Each quiz will have a 15 minute time limit. Each quiz will consist of 1 or 2 short questions within the scope of previously covered lectures and graded projects. Use of lecture, project and other course materials is permitted while taking the quizzes. Use of external sources (i.e. from the internet) is not permitted during quizzes. ## Grading Policy Course grades will be determined according to the following criteria: - - [Project 0](/projects/project0/): 12% - - [Project 1](/projects/project1/): 12% - - [Project 2](/projects/project2/): 12% - - [Project 3](/projects/project3/): 12% - - [Project 4](/projects/project4/): 12% - - [Final Project](/projects/finalproject/): - - Paper Review: 3% - - Paper Presentation: 3% - - Paper Reproduction: 6% - - Algorithmic Extension: 6% - - Written Report: 6% + - [Project 0]({{ site.baseurl }}/projects/project0/): 5% + - [Project 1]({{ site.baseurl }}/projects/project1/): 11% + - [Project 2]({{ site.baseurl }}/projects/project2/): 12% + - [Project 3]({{ site.baseurl }}/projects/project3/): 12% + - [Project 4]({{ site.baseurl }}/projects/project4/): 12% + - Midterm Exam: 10% + - [Final Project (group)]({{ site.baseurl }}/projects/finalproject/): 23% + - In-class Activities: 10% + - Participation: 5% + +## Assignment Turn-in and Late Submission Policy + +Projects reports are due at 11:59pm ET on their corresponding due date and should be submitted electronically to the Autograder or Canvas (including a photo of the signature page), respectively. For individual student projects, **three (3)** late tokens throughout the semester (1 late token corresponds to 24 hrs late, with no penalty). After all late tokens have been used, a late penalty of 10% of the total project grade per day will be applied. + +## Regrades + +Requests to regrade any graded assignment or project must be submitted in writing no later than one week following the return of the assignment/exam. It is important that requests be self-contained in writing such that you can carefully enunciate any errors or issues in grading. Well-formed and valid regrade requests will allow the course staff to properly address and correct any mistakes. - - 16 Pre-Lecture Quizzes: 16% (1% each) ## Collaboration Policy -The free flow of discussion and ideas is encouraged. However, all work submitted must be your own. +We will mark clearly whether an assignment is for an individual or a group. For **individual** assignments, you must submit your own assignment. For **group** assignments, each team must turn in work that is wholly their own: teams are encouraged to discuss problems, strategies, ideas, algorithms, etc. with other teams, but their write-ups (including software) must be done independently. Usually, students in the same group receive the same grades for a group project. However, if a certain person(s) did not contribute or participate as reflected in in-class participation, peer review checks and lab performance, etc., their grades will be reduced at the discretion of the instructional team based on their performance. + +Members of a team are required to work together on the problems. Further, each team member is expected to understand all aspects of the team project, its implementation, and underlying concepts. Proper team planning will require, at a minimum, one in-person meeting. With each problem's solution being the product of the group, all members will be held accountable for violations of the honor code. All code submitted must comply with the [College of Engineering Honor Code](https://bulletin.engin.umich.edu/rules/){: target="_blank" rel="noopener noreferrer"}. -No code can be communicated, including verbally. Explicit use of external sources must be clearly cited. +On every group-based lab assignment and project, students must include a **signed** statement below, including the following statements: + + - "I participated and contributed to team discussions on each problem, and I attest to the integrity of each solution. Our team met as a group on [DATE(S)]. " + - β€œContribution of Authors: [Team member A] did [Task XXX]; [Team members B and C] did [Task YYY]; [Team members A, B and C] did [ZZZ]. [All authors] \[gave feedback on the software development, contributed to writing the report/making the demo presentation, and approved the final version for submission.\]” (*Modify the texts in brackets according to your specific team situation and member contribution. Ideally, each member/subset of members contributed to something unique, and all authors contributed to giving feedback and writing/making the final report/demo/presentations and approving the final version for submission.) + + +An example of a reasonable scenario might be: "Bob was out of town when we met, but Alice and Carol were able to meet on [DATE]. Bob’s ideas were emailed to us and are reflected in our final solutions." We expect exceptions to be rare, but we understand that life can be complicated! The certification should be signed by each team member, and a scan of the certification attached to the submission. + +It is not acceptable to use code or solutions from outside class (including those found online) unless the resources are specifically suggested in the assignment statement. Non-permitted materials include (but not limited to) previous years' and other course materials (regardless of whether it originated from staff, students, etc.), the textbook's solution manual, etc. The use of any external material in the completion of course assignments and projects must be explicitly cited. + +## Generative AI Policy + +GenAI-use: **Permitted with certain rules (see below), and with disclosure.** + +Consequences for inappropriate use: Reported to school-based academic misconduct processes. + +Any and all use of machines that emulate human capabilities (U-M GPT, ChatGPT, Stable Diffusion, DALLE, etc. - hereinafter referred to as β€œGenAI” technology in general) to perform assignments or other works in the course should be **disclosed** (this includes all graded deliverables as well as other course works and activities). You must state which part of the assignment is from GenAI. + + - For programming assignments (Project 0 - Project 4) and midterm, it is not allowed to type in the project code template in GenAI and ask GenAI to fill in - **you must do the assignment (fill in the codes and answers) yourself**. + - For final projects, you may use GenAI to generating ideas, editing, translating, outlining, etc. as long as you **provide proper disclosure and acknowledge your GenAI use**, In your final report, you should specify (if any) which GenAI platform you used, which part is from GenAI results, what prompt you used, any tweaks/analysis to the results (e.g., you used GenAI to generate initial idea X but you modified Y and Z, etc. ) + +Our goal as a community of learners is to explore and understand how these tools may be used. + +U-M Generative AI website: [https://genai.umich.edu/](https://genai.umich.edu/){: target="_blank" rel="noopener noreferrer"} + ## Discussion Forum -The [Ed Stem](https://edstem.org/us/courses/31008/discussion/){: target="_blank" rel="noopener noreferrer"} discussion forum is available for discussion of course materials including lectures and projects. This forum will be shared across course offerings at the University of Michigan and the University of Minnesota. Students are not required to participate, use or join the Ed Stem forum. Students may opt-in to join the forum using this [Google Form](https://docs.google.com/forms/d/e/1FAIpQLSelLeqIUKBxQvqqp6LFs2fSYfzy9D_QCcvtXc302hnm6oF1EA/viewform?usp=sharing){: target="_blank" rel="noopener noreferrer"}. +The [Piazza](https://piazza.com/class/m4pgejar4ua2qf){: target="_blank" rel="noopener noreferrer"} discussion forum is available for discussion of course materials including lectures and projects. Students are not required to participate, use or join the Piazza forum. + +Any discussion of quizzes and verbatim code on the Piazza forum must be posted privately. + + + +## Course Policies + +**Academic Integrity:** All students in the class are presumed to be decent and honorable, and all students in the class are bound by the College of Engineering Honor Code. You may not seek to gain an unfair advantage over your fellow students; you may not consult, look at, or possess the unpublished work of another without their permission; and you must appropriately acknowledge your use of another’s work. + +**Accommodations for Students with Disabilities:** If you think you need an accommodation for a disability, please let us know at your earliest convenience so that we can work with the Services for Students with Disabilities (SSD) office to help us determine appropriate academic accommodations (734-763-3000; [http://ssd.umich.edu](http://ssd.umich.edu){: target="_blank" rel="noopener noreferrer"}). Any information you provide is private and confidential and will be treated as such. + +**Diversity Statement:** All members of this class are expected to contribute to a respectful, welcoming and inclusive environment for every other member of the class. We consider this classroom to be a place where you will be treated with respect, and we welcome individuals of all ages, backgrounds, beliefs, ethnicities, genders, gender identities, gender expressions, national origins, religious affiliations, sexual orientations, ability – and other visible and nonvisible differences. + +**Student Well-Being:** Students may experience stressors that can impact both their academic experience and their personal well-being. These may include academic pressure and challenges associated with relationships, mental health, alcohol or other drugs, identities, finances, etc. If you are experiencing concerns, seeking help is a courageous thing to do for yourself and those who care about you. If the source of your stressors is academic, please contact me so that we can find solutions together. For personal concerns, U-M offers many resources, some of which are listed at Resources for Student Well-being on the Well-being for U-M Students website: [https://wellbeing.studentlife.umich.edu/](https://wellbeing.studentlife.umich.edu/){: target="_blank" rel="noopener noreferrer"}. + +**Family Educational Rights and Privacy Act (FERPA):** Any in person course lectures may be audio/video recorded and made available to other students in this course. Students may not record or distribute any class activity without written permission from the instructor, except as necessary as part of approved accommodations for students with disabilities. Any approved recordings may only be used for the student’s own private use. -Any discussion of quizzes and verbatim code on the Ed Stem forum must be posted privately. diff --git a/tools/index.md b/tools/index.md new file mode 100644 index 0000000..8da15e3 --- /dev/null +++ b/tools/index.md @@ -0,0 +1,209 @@ +--- +layout: page +title: Tools +description: Collection of deep learning tools and frameworks. +nav_order: 6 +has_children: false +has_toc: true +--- + +# Deep Learning Tools and Frameworks +{:.no_toc} + +A collection of tools and projects that support deep learning applications to robotic tasks. Within each category below, the course staff provides a sample of tools that may be helpful for implementing course and research projects with deep learning. + +--- + +## Table of contents +{: .no_toc .text-delta } + +1. TOC +{:toc} + +--- + + + + +## Learning Frameworks + +Deep Learning frameworks speed-up model development and testing by providing users with optimized implementations of low-level functions (automatic differentiation, gradient descent, matrix operations, etc.) that can be used as building-blocks for robot applications. + + +
+Open-source python-based deep learning frameworks have fostered a community and set of tools responsible for many of the core research and engineering developments powering deep learning. Here are some of the most popular python-based frameworks: + +
+ +
+ +
+ + Link to PyTorch software website + +
+ +
+ + Link to JAX software website + +
+ +
+ + Link to TensorFlow software website + +
+ +
+ + Link to PaddlePaddle software website + +
+
+
+ +
+ +
+ +While Python-based learning frameworks typically offer C-APIs, they have been built-on and inspired-by additional open-source projects, including: + +
+ +
+ +
+ + Link to Darknet software website + +
+ + + +
+ + Link to Caffe software website + +
+ +
+ + Link to Caffe2 software website + +
+
+
+ + +
+ +
+ +The more recent language, Julia, which you may be familiar with through the [ROB curriculum](https://robotics.umich.edu/academic-program/course-offerings/rob101/){: target="_blank" rel="noopener noreferrer"}, also offers deep learning support: + +
+ +
+ +
+ + Link to Flux software website + +
+ +
+ + Link to MLJ software website + +
+ + +
+
+ + + + + +## Library Ecosystem + +A vast number of specialized software libraries have been built upon the popular open-source learning frameworks to provide data structures and algorithms that are customized to specific domains and use-cases. Often, these provide optimized implementations that are useful for a specific group of developers working within a specific sub-area of robot learning while also facilitating interactions and collaborations within the community. Contributing to these open-source projects is valuable and encouraged. Here are a sample that may be related to your interests: + + - [PyTorch3D](https://pytorch3d.org/){: target="_blank" rel="noopener noreferrer"}: a specialized library for integrating 3D data (points, geometries, renderers, etc.) with deep learning in PyTorch + + - [nerfstudio](https://docs.nerf.studio/){: target="_blank" rel="noopener noreferrer"}: for implementing, training, visualizing, and exporting your Neural Radiance Fields + + - [Ray](https://docs.ray.io/en/latest/){: target="_blank" rel="noopener noreferrer"}: a library for scalable reinforcement learning + + - [GPyTorch](https://gpytorch.ai/){: target="_blank" rel="noopener noreferrer"}: for implementing Gaussian Processes in with GPU-acceleration in PyTorch + + + +## Data Annotation + +Much of deep learning ([but not all]({{ site.baseurl }}/papers/#self-supervised-learning)) relies on human-annotated data for training or evaluation. The data and annotations are typically expensive to obtain, which motivates the development and use of tools to make data annotation more efficient and cheaper. Here are a small sample of tools that exist to support your labeling efforts: + + - [Voxel51](https://voxel51.com/fiftyone/){: target="_blank" rel="noopener noreferrer"} + + - [LabelStudio](https://labelstud.io/){: target="_blank" rel="noopener noreferrer"} + + - [CVAT: Computer Vision Annotation Tool](https://www.cvat.ai/){: target="_blank" rel="noopener noreferrer"} + + - [3D BAT: Bounding Box Annotation Tool](https://github.com/walzimmer/3d-bat){: target="_blank" rel="noopener noreferrer"} + + - [ProgressLabeller](https://progress.eecs.umich.edu/projects/progress-labeller/){: target="_blank" rel="noopener noreferrer"} + + - [6D-PAT: Pose Annotation Tool](https://github.com/florianblume/6d-pat){: target="_blank" rel="noopener noreferrer"} + + + +## Simulation Environments + +Robots and sensors can be prohibitively expensive, so simulation environments have been created to enable large-scale training (i.e. [reinforcement learning]({{ site.baseurl }}/papers/#reinforcement-learning)) and data collection within robotics. Here are a few to get you started: + + - [NVIDIA Isaac Sim](https://developer.nvidia.com/isaac-sim){: target="_blank" rel="noopener noreferrer"} + + - [AI2THOR](https://ai2thor.allenai.org/){: target="_blank" rel="noopener noreferrer"} + + - [PyBullet](https://pybullet.org/){: target="_blank" rel="noopener noreferrer"} + + - [Gymnasium](https://gymnasium.farama.org/){: target="_blank" rel="noopener noreferrer"} + + + +## Visualization + +Understanding model architectures and outputs is crucial to validate and develop useful deep learning-based systems. Visualization tools can help support our understanding, especially for perception tasks, by depicting model predictions. Knowing which tools are useful for a given modality and how to use those tools is valuable skill for deep learning practicioners. Here are some visualization tools that are broadly useful: + + + - [TensorBoard](https://www.tensorflow.org/tensorboard){: target="_blank" rel="noopener noreferrer"}: to understand model architectures and summary statistics measured during training + + - [Open3D](http://www.open3d.org/){: target="_blank" rel="noopener noreferrer"}: for visualizing 3D data modalities + + - [Plotly](https://plotly.com/python/){: target="_blank" rel="noopener noreferrer"}: for general purpose analysis and plotting + + - [RViz](http://wiki.ros.org/rviz){: target="_blank" rel="noopener noreferrer"}: for integrating visualization with ROS topics + + + + +## Model Formats + +Open-source model formats describe learning-based architectures and models in order to enable sharing and deploying the models independent of the software-hardware used during model training and development. These are especially useful for deploying the models, where minimizing latencies caused by the general-purpose learning frameworks can be crucial. + + - [ONNX: Open Neural Network Exchange](https://onnx.ai/){: target="_blank" rel="noopener noreferrer"} + + - [NNEF: Neural Network Exchange Format](https://www.khronos.org/nnef/){: target="_blank" rel="noopener noreferrer"} + + +## Misc + + - [distill.pub](https://distill.pub/guide/){: target="_blank" rel="noopener noreferrer"}: for organizing your publications in an elegant and shareable format + + - [ConvNetJS](https://cs.stanford.edu/people/karpathy/convnetjs/): A minimal Javascript library for neural network training and inference within web-browsers +