diff --git a/src/year2/machine-learning-for-computer-vision/img/2order_1step.jpg b/src/year2/machine-learning-for-computer-vision/img/2order_1step.jpg new file mode 100644 index 0000000..83bd263 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/2order_1step.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_2order_optimizer.jpg b/src/year2/machine-learning-for-computer-vision/img/_2order_optimizer.jpg new file mode 100644 index 0000000..dea8b74 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_2order_optimizer.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_adaboost_example1.jpg b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example1.jpg new file mode 100644 index 0000000..0298857 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example1.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_adaboost_example2.jpg b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example2.jpg new file mode 100644 index 0000000..a196c9e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_adaboost_example3.jpg b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example3.jpg new file mode 100644 index 0000000..d98997c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example3.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_adaboost_example4.jpg b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example4.jpg new file mode 100644 index 0000000..55c78cb Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_adaboost_example4.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_arcface_cluster.jpg b/src/year2/machine-learning-for-computer-vision/img/_arcface_cluster.jpg new file mode 100644 index 0000000..c26a2c3 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_arcface_cluster.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_arcface_flow.jpg b/src/year2/machine-learning-for-computer-vision/img/_arcface_flow.jpg new file mode 100644 index 0000000..68a6e93 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_arcface_flow.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_arcface_penalty.jpg b/src/year2/machine-learning-for-computer-vision/img/_arcface_penalty.jpg new file mode 100644 index 0000000..e045892 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_arcface_penalty.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_arcface_softmax.jpg b/src/year2/machine-learning-for-computer-vision/img/_arcface_softmax.jpg new file mode 100644 index 0000000..4721af7 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_arcface_softmax.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_centernet_other_tasks.jpg b/src/year2/machine-learning-for-computer-vision/img/_centernet_other_tasks.jpg new file mode 100644 index 0000000..333ebe4 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_centernet_other_tasks.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_clip_generation_conditioning.jpg b/src/year2/machine-learning-for-computer-vision/img/_clip_generation_conditioning.jpg new file mode 100644 index 0000000..e40a444 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_clip_generation_conditioning.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_clip_inference.jpg b/src/year2/machine-learning-for-computer-vision/img/_clip_inference.jpg new file mode 100644 index 0000000..6770872 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_clip_inference.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_clip_resnet_distributional_shift.jpg b/src/year2/machine-learning-for-computer-vision/img/_clip_resnet_distributional_shift.jpg new file mode 100644 index 0000000..dde550b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_clip_resnet_distributional_shift.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_clip_resnet_distributional_shift_datasets.jpg b/src/year2/machine-learning-for-computer-vision/img/_clip_resnet_distributional_shift_datasets.jpg new file mode 100644 index 0000000..429cd3b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_clip_resnet_distributional_shift_datasets.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_clip_training.jpg b/src/year2/machine-learning-for-computer-vision/img/_clip_training.jpg new file mode 100644 index 0000000..0ffe146 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_clip_training.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_cnn_knn_face_recognition.jpg b/src/year2/machine-learning-for-computer-vision/img/_cnn_knn_face_recognition.jpg new file mode 100644 index 0000000..87115ac Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_cnn_knn_face_recognition.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_cnn_object_localization.jpg b/src/year2/machine-learning-for-computer-vision/img/_cnn_object_localization.jpg new file mode 100644 index 0000000..afc41d3 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_cnn_object_localization.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_cnn_pyramid_multi_scale.jpg b/src/year2/machine-learning-for-computer-vision/img/_cnn_pyramid_multi_scale.jpg new file mode 100644 index 0000000..8677b2e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_cnn_pyramid_multi_scale.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_cross_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/_cross_attention.jpg new file mode 100644 index 0000000..e09fb36 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_cross_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_darknet.jpg b/src/year2/machine-learning-for-computer-vision/img/_darknet.jpg new file mode 100644 index 0000000..b9d15b7 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_darknet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_deeplabv3.jpg b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3.jpg new file mode 100644 index 0000000..abcd5f5 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_deeplabv3_aspp.jpg b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3_aspp.jpg new file mode 100644 index 0000000..5d9914c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3_aspp.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_1.jpg b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_1.jpg new file mode 100644 index 0000000..c25ed29 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_1.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_2.jpg b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_2.jpg new file mode 100644 index 0000000..7733f9a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_3.jpg b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_3.jpg new file mode 100644 index 0000000..9881521 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_deeplabv3plus_3.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_depth_comparison_features.jpg b/src/year2/machine-learning-for-computer-vision/img/_depth_comparison_features.jpg new file mode 100644 index 0000000..456cb55 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_depth_comparison_features.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_depth_invariant_offset.jpg b/src/year2/machine-learning-for-computer-vision/img/_depth_invariant_offset.jpg new file mode 100644 index 0000000..3f713cb Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_depth_invariant_offset.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_depthwise_conv.jpg b/src/year2/machine-learning-for-computer-vision/img/_depthwise_conv.jpg new file mode 100644 index 0000000..0b224c3 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_depthwise_conv.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_detr_architecture.jpg b/src/year2/machine-learning-for-computer-vision/img/_detr_architecture.jpg new file mode 100644 index 0000000..962f20c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_detr_architecture.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dilated_conv_weights.jpg b/src/year2/machine-learning-for-computer-vision/img/_dilated_conv_weights.jpg new file mode 100644 index 0000000..30214df Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dilated_conv_weights.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dilated_convolution.jpg b/src/year2/machine-learning-for-computer-vision/img/_dilated_convolution.jpg new file mode 100644 index 0000000..df8448f Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dilated_convolution.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dilated_convolution_exponential.jpg b/src/year2/machine-learning-for-computer-vision/img/_dilated_convolution_exponential.jpg new file mode 100644 index 0000000..848a744 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dilated_convolution_exponential.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet.jpg b/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet.jpg new file mode 100644 index 0000000..6b593d1 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet_stage1.jpg b/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet_stage1.jpg new file mode 100644 index 0000000..caaa034 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet_stage1.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet_stage2.jpg b/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet_stage2.jpg new file mode 100644 index 0000000..031e495 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dilated_resnet_stage2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_dot_product_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/_dot_product_attention.jpg new file mode 100644 index 0000000..0c9b451 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_dot_product_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_embedding_l2_norm_effect.jpg b/src/year2/machine-learning-for-computer-vision/img/_embedding_l2_norm_effect.jpg new file mode 100644 index 0000000..99796c5 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_embedding_l2_norm_effect.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_example_precision_recall_curve1.jpg b/src/year2/machine-learning-for-computer-vision/img/_example_precision_recall_curve1.jpg new file mode 100644 index 0000000..c97813f Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_example_precision_recall_curve1.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_example_precision_recall_curve2.jpg b/src/year2/machine-learning-for-computer-vision/img/_example_precision_recall_curve2.jpg new file mode 100644 index 0000000..6020f8f Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_example_precision_recall_curve2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_fast_r_cnn.jpg b/src/year2/machine-learning-for-computer-vision/img/_fast_r_cnn.jpg new file mode 100644 index 0000000..2903bd6 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_fast_r_cnn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_faster_r_cnn.jpg b/src/year2/machine-learning-for-computer-vision/img/_faster_r_cnn.jpg new file mode 100644 index 0000000..5ce12fd Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_faster_r_cnn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_faster_r_cnn_fpn.jpg b/src/year2/machine-learning-for-computer-vision/img/_faster_r_cnn_fpn.jpg new file mode 100644 index 0000000..7f4ddcc Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_faster_r_cnn_fpn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_fcn_16.jpg b/src/year2/machine-learning-for-computer-vision/img/_fcn_16.jpg new file mode 100644 index 0000000..4900839 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_fcn_16.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_fcn_32.jpg b/src/year2/machine-learning-for-computer-vision/img/_fcn_32.jpg new file mode 100644 index 0000000..675ef32 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_fcn_32.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_fcn_8.jpg b/src/year2/machine-learning-for-computer-vision/img/_fcn_8.jpg new file mode 100644 index 0000000..86a7ae3 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_fcn_8.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_focal_cdf_background.jpg b/src/year2/machine-learning-for-computer-vision/img/_focal_cdf_background.jpg new file mode 100644 index 0000000..960035f Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_focal_cdf_background.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_focal_cdf_foreground.jpg b/src/year2/machine-learning-for-computer-vision/img/_focal_cdf_foreground.jpg new file mode 100644 index 0000000..e99fd67 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_focal_cdf_foreground.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_focal_loss.jpg b/src/year2/machine-learning-for-computer-vision/img/_focal_loss.jpg new file mode 100644 index 0000000..fe7633a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_focal_loss.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_fpn_flow.jpg b/src/year2/machine-learning-for-computer-vision/img/_fpn_flow.jpg new file mode 100644 index 0000000..6045986 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_fpn_flow.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_fpn_top_down.jpg b/src/year2/machine-learning-for-computer-vision/img/_fpn_top_down.jpg new file mode 100644 index 0000000..b7c700b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_fpn_top_down.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_gan_flow.jpg b/src/year2/machine-learning-for-computer-vision/img/_gan_flow.jpg new file mode 100644 index 0000000..3966ce1 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_gan_flow.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_grouped_conv.jpg b/src/year2/machine-learning-for-computer-vision/img/_grouped_conv.jpg new file mode 100644 index 0000000..3e37d61 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_grouped_conv.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_haar_like_example.jpg b/src/year2/machine-learning-for-computer-vision/img/_haar_like_example.jpg new file mode 100644 index 0000000..78d2260 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_haar_like_example.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_haar_like_filters_example.jpg b/src/year2/machine-learning-for-computer-vision/img/_haar_like_filters_example.jpg new file mode 100644 index 0000000..a7bd67e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_haar_like_filters_example.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_image_pyramid_multi_scale.jpg b/src/year2/machine-learning-for-computer-vision/img/_image_pyramid_multi_scale.jpg new file mode 100644 index 0000000..9b9e674 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_image_pyramid_multi_scale.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_inception_score.jpg b/src/year2/machine-learning-for-computer-vision/img/_inception_score.jpg new file mode 100644 index 0000000..1e92e09 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_inception_score.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_integral_image.jpg b/src/year2/machine-learning-for-computer-vision/img/_integral_image.jpg new file mode 100644 index 0000000..7c10e30 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_integral_image.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_integral_image_feature.jpg b/src/year2/machine-learning-for-computer-vision/img/_integral_image_feature.jpg new file mode 100644 index 0000000..b7404aa Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_integral_image_feature.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_integral_image_filters.jpg b/src/year2/machine-learning-for-computer-vision/img/_integral_image_filters.jpg new file mode 100644 index 0000000..a10c66c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_integral_image_filters.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_inverted_residual.jpg b/src/year2/machine-learning-for-computer-vision/img/_inverted_residual.jpg new file mode 100644 index 0000000..243fe1b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_inverted_residual.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_mask2former.jpg b/src/year2/machine-learning-for-computer-vision/img/_mask2former.jpg new file mode 100644 index 0000000..bb8c326 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_mask2former.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_mask_rcnn.jpg b/src/year2/machine-learning-for-computer-vision/img/_mask_rcnn.jpg new file mode 100644 index 0000000..288b1fb Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_mask_rcnn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_mask_rcnn_head.jpg b/src/year2/machine-learning-for-computer-vision/img/_mask_rcnn_head.jpg new file mode 100644 index 0000000..7c3c734 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_mask_rcnn_head.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_masked_self_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/_masked_self_attention.jpg new file mode 100644 index 0000000..1ca2474 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_masked_self_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_maskformer_decoder.jpg b/src/year2/machine-learning-for-computer-vision/img/_maskformer_decoder.jpg new file mode 100644 index 0000000..927e84a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_maskformer_decoder.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_maskformer_inference.jpg b/src/year2/machine-learning-for-computer-vision/img/_maskformer_inference.jpg new file mode 100644 index 0000000..3ea3635 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_maskformer_inference.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_maskformer_naive.jpg b/src/year2/machine-learning-for-computer-vision/img/_maskformer_naive.jpg new file mode 100644 index 0000000..6b784f8 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_maskformer_naive.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_mnist_embeddings.jpg b/src/year2/machine-learning-for-computer-vision/img/_mnist_embeddings.jpg new file mode 100644 index 0000000..50da9b9 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_mnist_embeddings.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_model_scaling.jpg b/src/year2/machine-learning-for-computer-vision/img/_model_scaling.jpg new file mode 100644 index 0000000..4fbe92c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_model_scaling.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_monodepth_correct.jpg b/src/year2/machine-learning-for-computer-vision/img/_monodepth_correct.jpg new file mode 100644 index 0000000..4255483 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_monodepth_correct.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_monodepth_lr.jpg b/src/year2/machine-learning-for-computer-vision/img/_monodepth_lr.jpg new file mode 100644 index 0000000..5202957 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_monodepth_lr.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_monodepth_naive.jpg b/src/year2/machine-learning-for-computer-vision/img/_monodepth_naive.jpg new file mode 100644 index 0000000..c152f6a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_monodepth_naive.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_monodepth_train_correct.jpg b/src/year2/machine-learning-for-computer-vision/img/_monodepth_train_correct.jpg new file mode 100644 index 0000000..7243a97 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_monodepth_train_correct.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_monodepth_train_naive.jpg b/src/year2/machine-learning-for-computer-vision/img/_monodepth_train_naive.jpg new file mode 100644 index 0000000..2e41d28 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_monodepth_train_naive.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_multi_head_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/_multi_head_attention.jpg new file mode 100644 index 0000000..9852e8f Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_multi_head_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_object_detection_example.jpg b/src/year2/machine-learning-for-computer-vision/img/_object_detection_example.jpg new file mode 100644 index 0000000..03366b5 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_object_detection_example.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_object_detection_map_speed_plot.jpg b/src/year2/machine-learning-for-computer-vision/img/_object_detection_map_speed_plot.jpg new file mode 100644 index 0000000..f157a9c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_object_detection_map_speed_plot.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_one_stage_detector.jpg b/src/year2/machine-learning-for-computer-vision/img/_one_stage_detector.jpg new file mode 100644 index 0000000..2fd55fe Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_one_stage_detector.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_panoptic_fpn.jpg b/src/year2/machine-learning-for-computer-vision/img/_panoptic_fpn.jpg new file mode 100644 index 0000000..55fa53b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_panoptic_fpn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_post_norm_encoder.jpg b/src/year2/machine-learning-for-computer-vision/img/_post_norm_encoder.jpg new file mode 100644 index 0000000..c5f981e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_post_norm_encoder.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_pre_norm_encoder.jpg b/src/year2/machine-learning-for-computer-vision/img/_pre_norm_encoder.jpg new file mode 100644 index 0000000..8f51a63 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_pre_norm_encoder.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_progan.jpg b/src/year2/machine-learning-for-computer-vision/img/_progan.jpg new file mode 100644 index 0000000..f8f7b76 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_progan.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_progan_fadein.jpg b/src/year2/machine-learning-for-computer-vision/img/_progan_fadein.jpg new file mode 100644 index 0000000..c6de6d7 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_progan_fadein.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_r_cnn.jpg b/src/year2/machine-learning-for-computer-vision/img/_r_cnn.jpg new file mode 100644 index 0000000..a011f85 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_r_cnn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_random_forest_bagging.jpg b/src/year2/machine-learning-for-computer-vision/img/_random_forest_bagging.jpg new file mode 100644 index 0000000..ea712ae Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_random_forest_bagging.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_random_forest_random_splitting.jpg b/src/year2/machine-learning-for-computer-vision/img/_random_forest_random_splitting.jpg new file mode 100644 index 0000000..8aa6683 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_random_forest_random_splitting.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_resnext_block.jpg b/src/year2/machine-learning-for-computer-vision/img/_resnext_block.jpg new file mode 100644 index 0000000..26681dc Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_resnext_block.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l1.jpg b/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l1.jpg new file mode 100644 index 0000000..647c350 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l1.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l2.jpg b/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l2.jpg new file mode 100644 index 0000000..c4935a2 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l3.jpg b/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l3.jpg new file mode 100644 index 0000000..c755e0e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_resnext_to_resnet_l3.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_retinanet.jpg b/src/year2/machine-learning-for-computer-vision/img/_retinanet.jpg new file mode 100644 index 0000000..5debf67 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_retinanet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_roi_align1.jpg b/src/year2/machine-learning-for-computer-vision/img/_roi_align1.jpg new file mode 100644 index 0000000..346245d Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_roi_align1.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_roi_align2.jpg b/src/year2/machine-learning-for-computer-vision/img/_roi_align2.jpg new file mode 100644 index 0000000..77e191d Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_roi_align2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_roi_align3.jpg b/src/year2/machine-learning-for-computer-vision/img/_roi_align3.jpg new file mode 100644 index 0000000..2be270a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_roi_align3.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_roipool_maxpool.jpg b/src/year2/machine-learning-for-computer-vision/img/_roipool_maxpool.jpg new file mode 100644 index 0000000..b5c6869 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_roipool_maxpool.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_roipool_snap.jpg b/src/year2/machine-learning-for-computer-vision/img/_roipool_snap.jpg new file mode 100644 index 0000000..7440e56 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_roipool_snap.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_rpn_anchor.jpg b/src/year2/machine-learning-for-computer-vision/img/_rpn_anchor.jpg new file mode 100644 index 0000000..99a0490 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_rpn_anchor.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_rpn_architecture.jpg b/src/year2/machine-learning-for-computer-vision/img/_rpn_architecture.jpg new file mode 100644 index 0000000..25d4827 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_rpn_architecture.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_scaled_dot_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/_scaled_dot_attention.jpg new file mode 100644 index 0000000..2176fa2 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_scaled_dot_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_scaled_dot_attention_multi_q.jpg b/src/year2/machine-learning-for-computer-vision/img/_scaled_dot_attention_multi_q.jpg new file mode 100644 index 0000000..03ceef6 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_scaled_dot_attention_multi_q.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_segmentation_rcnn.jpg b/src/year2/machine-learning-for-computer-vision/img/_segmentation_rcnn.jpg new file mode 100644 index 0000000..606efa3 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_segmentation_rcnn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_self_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/_self_attention.jpg new file mode 100644 index 0000000..4474ae1 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_self_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_self_attention_permutation.jpg b/src/year2/machine-learning-for-computer-vision/img/_self_attention_permutation.jpg new file mode 100644 index 0000000..afec271 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_self_attention_permutation.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_sfmlearner.jpg b/src/year2/machine-learning-for-computer-vision/img/_sfmlearner.jpg new file mode 100644 index 0000000..f27b0ac Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_sfmlearner.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_siamese_network.jpg b/src/year2/machine-learning-for-computer-vision/img/_siamese_network.jpg new file mode 100644 index 0000000..d43cd92 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_siamese_network.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_spp.jpg b/src/year2/machine-learning-for-computer-vision/img/_spp.jpg new file mode 100644 index 0000000..9b8547b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_spp.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_stereo_pipeline_naive.jpg b/src/year2/machine-learning-for-computer-vision/img/_stereo_pipeline_naive.jpg new file mode 100644 index 0000000..a8b3921 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_stereo_pipeline_naive.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_stereo_pipeline_reconstruction.jpg b/src/year2/machine-learning-for-computer-vision/img/_stereo_pipeline_reconstruction.jpg new file mode 100644 index 0000000..3fb064f Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_stereo_pipeline_reconstruction.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_stylegan.jpg b/src/year2/machine-learning-for-computer-vision/img/_stylegan.jpg new file mode 100644 index 0000000..d8c6862 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_stylegan.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_template_matching.jpg b/src/year2/machine-learning-for-computer-vision/img/_template_matching.jpg new file mode 100644 index 0000000..4a8e3cb Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_template_matching.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_transformer_autoregressive.jpg b/src/year2/machine-learning-for-computer-vision/img/_transformer_autoregressive.jpg new file mode 100644 index 0000000..c246580 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_transformer_autoregressive.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_transformer_decoder.jpg b/src/year2/machine-learning-for-computer-vision/img/_transformer_decoder.jpg new file mode 100644 index 0000000..9832386 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_transformer_decoder.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_transformer_embeddings.jpg b/src/year2/machine-learning-for-computer-vision/img/_transformer_embeddings.jpg new file mode 100644 index 0000000..595b6d8 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_transformer_embeddings.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_transformer_position_encoding.jpg b/src/year2/machine-learning-for-computer-vision/img/_transformer_position_encoding.jpg new file mode 100644 index 0000000..77639e3 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_transformer_position_encoding.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_transposed_convolution.jpg b/src/year2/machine-learning-for-computer-vision/img/_transposed_convolution.jpg new file mode 100644 index 0000000..4c98e70 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_transposed_convolution.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_triplet_loss.jpg b/src/year2/machine-learning-for-computer-vision/img/_triplet_loss.jpg new file mode 100644 index 0000000..f9be780 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_triplet_loss.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_unet.jpg b/src/year2/machine-learning-for-computer-vision/img/_unet.jpg new file mode 100644 index 0000000..a3f82e8 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_unet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_viola_jones_cascade.jpg b/src/year2/machine-learning-for-computer-vision/img/_viola_jones_cascade.jpg new file mode 100644 index 0000000..625aa77 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_viola_jones_cascade.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_vision_transformer.jpg b/src/year2/machine-learning-for-computer-vision/img/_vision_transformer.jpg new file mode 100644 index 0000000..40cda88 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_vision_transformer.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_vit_embedding_similarity.jpg b/src/year2/machine-learning-for-computer-vision/img/_vit_embedding_similarity.jpg new file mode 100644 index 0000000..a76a233 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_vit_embedding_similarity.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_vit_head_distance.jpg b/src/year2/machine-learning-for-computer-vision/img/_vit_head_distance.jpg new file mode 100644 index 0000000..cd7f3a1 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_vit_head_distance.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_vit_patch.jpg b/src/year2/machine-learning-for-computer-vision/img/_vit_patch.jpg new file mode 100644 index 0000000..5e01efd Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_vit_patch.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_vit_projection_rgb.jpg b/src/year2/machine-learning-for-computer-vision/img/_vit_projection_rgb.jpg new file mode 100644 index 0000000..6c7982b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_vit_projection_rgb.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/_vit_results.jpg b/src/year2/machine-learning-for-computer-vision/img/_vit_results.jpg new file mode 100644 index 0000000..9247e58 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/_vit_results.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/adagrad.jpg b/src/year2/machine-learning-for-computer-vision/img/adagrad.jpg new file mode 100644 index 0000000..19cce83 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/adagrad.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/adam.jpg b/src/year2/machine-learning-for-computer-vision/img/adam.jpg new file mode 100644 index 0000000..be1bd99 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/adam.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/adam_noisy.jpg b/src/year2/machine-learning-for-computer-vision/img/adam_noisy.jpg new file mode 100644 index 0000000..fbbc1f4 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/adam_noisy.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/adaptive_lr.jpg b/src/year2/machine-learning-for-computer-vision/img/adaptive_lr.jpg new file mode 100644 index 0000000..507292c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/adaptive_lr.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/aspp_deeplabv2.jpg b/src/year2/machine-learning-for-computer-vision/img/aspp_deeplabv2.jpg new file mode 100644 index 0000000..e26f59e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/aspp_deeplabv2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/bifpn.jpg b/src/year2/machine-learning-for-computer-vision/img/bifpn.jpg new file mode 100644 index 0000000..85b1a94 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/bifpn.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/centernet_outputs.jpg b/src/year2/machine-learning-for-computer-vision/img/centernet_outputs.jpg new file mode 100644 index 0000000..18f8659 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/centernet_outputs.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/compound_scaling.jpg b/src/year2/machine-learning-for-computer-vision/img/compound_scaling.jpg new file mode 100644 index 0000000..fb1a2b0 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/compound_scaling.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/dcgan.jpg b/src/year2/machine-learning-for-computer-vision/img/dcgan.jpg new file mode 100644 index 0000000..aad1561 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/dcgan.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/deepid2.jpg b/src/year2/machine-learning-for-computer-vision/img/deepid2.jpg new file mode 100644 index 0000000..ee232b8 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/deepid2.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/detr_decoder.jpg b/src/year2/machine-learning-for-computer-vision/img/detr_decoder.jpg new file mode 100644 index 0000000..416210c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/detr_decoder.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/detr_encoder.jpg b/src/year2/machine-learning-for-computer-vision/img/detr_encoder.jpg new file mode 100644 index 0000000..a6ecb79 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/detr_encoder.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/detr_object_query.jpg b/src/year2/machine-learning-for-computer-vision/img/detr_object_query.jpg new file mode 100644 index 0000000..3862b25 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/detr_object_query.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/earth_mover.jpg b/src/year2/machine-learning-for-computer-vision/img/earth_mover.jpg new file mode 100644 index 0000000..31ca3ab Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/earth_mover.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/earth_mover_plan.jpg b/src/year2/machine-learning-for-computer-vision/img/earth_mover_plan.jpg new file mode 100644 index 0000000..eb8551a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/earth_mover_plan.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/edf.jpg b/src/year2/machine-learning-for-computer-vision/img/edf.jpg new file mode 100644 index 0000000..c42503d Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/edf.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/efficientnet_scaling.jpg b/src/year2/machine-learning-for-computer-vision/img/efficientnet_scaling.jpg new file mode 100644 index 0000000..ed4a446 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/efficientnet_scaling.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/entropy.jpg b/src/year2/machine-learning-for-computer-vision/img/entropy.jpg new file mode 100644 index 0000000..0bd2312 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/entropy.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/gan_disjoint.jpg b/src/year2/machine-learning-for-computer-vision/img/gan_disjoint.jpg new file mode 100644 index 0000000..e38d14d Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/gan_disjoint.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/gan_latent_interpolation.jpg b/src/year2/machine-learning-for-computer-vision/img/gan_latent_interpolation.jpg new file mode 100644 index 0000000..2d02564 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/gan_latent_interpolation.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/generative_task.jpg b/src/year2/machine-learning-for-computer-vision/img/generative_task.jpg new file mode 100644 index 0000000..af1ab8c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/generative_task.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/hungarian_loss.jpg b/src/year2/machine-learning-for-computer-vision/img/hungarian_loss.jpg new file mode 100644 index 0000000..2de5768 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/hungarian_loss.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/image_manifold.jpg b/src/year2/machine-learning-for-computer-vision/img/image_manifold.jpg new file mode 100644 index 0000000..00e2941 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/image_manifold.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/latent_for_generation.jpg b/src/year2/machine-learning-for-computer-vision/img/latent_for_generation.jpg new file mode 100644 index 0000000..7136a5a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/latent_for_generation.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/manifold_precision_recall.jpg b/src/year2/machine-learning-for-computer-vision/img/manifold_precision_recall.jpg new file mode 100644 index 0000000..4ad6f64 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/manifold_precision_recall.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/momentum.jpg b/src/year2/machine-learning-for-computer-vision/img/momentum.jpg new file mode 100644 index 0000000..9ee0dac Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/momentum.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/momentum_local_global.jpg b/src/year2/machine-learning-for-computer-vision/img/momentum_local_global.jpg new file mode 100644 index 0000000..f11286b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/momentum_local_global.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/monodepth_lr_results.jpg b/src/year2/machine-learning-for-computer-vision/img/monodepth_lr_results.jpg new file mode 100644 index 0000000..e0c6cfd Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/monodepth_lr_results.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/monodepth_no_lr_results.jpg b/src/year2/machine-learning-for-computer-vision/img/monodepth_no_lr_results.jpg new file mode 100644 index 0000000..38c8bd8 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/monodepth_no_lr_results.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/motion_data.jpg b/src/year2/machine-learning-for-computer-vision/img/motion_data.jpg new file mode 100644 index 0000000..173f3ea Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/motion_data.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/multiscale_comparison.jpg b/src/year2/machine-learning-for-computer-vision/img/multiscale_comparison.jpg new file mode 100644 index 0000000..c297889 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/multiscale_comparison.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/nesterov_comparison.jpg b/src/year2/machine-learning-for-computer-vision/img/nesterov_comparison.jpg new file mode 100644 index 0000000..37a15ec Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/nesterov_comparison.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/nesterov_momentum.jpg b/src/year2/machine-learning-for-computer-vision/img/nesterov_momentum.jpg new file mode 100644 index 0000000..79553a6 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/nesterov_momentum.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/neural_architecture_search.jpg b/src/year2/machine-learning-for-computer-vision/img/neural_architecture_search.jpg new file mode 100644 index 0000000..444e94a Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/neural_architecture_search.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/norm_methods.jpg b/src/year2/machine-learning-for-computer-vision/img/norm_methods.jpg new file mode 100644 index 0000000..a58cc2e Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/norm_methods.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/obj_det_recall_precision.jpg b/src/year2/machine-learning-for-computer-vision/img/obj_det_recall_precision.jpg new file mode 100644 index 0000000..1f549f1 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/obj_det_recall_precision.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/obj_detection_and_segmentation.jpg b/src/year2/machine-learning-for-computer-vision/img/obj_detection_and_segmentation.jpg new file mode 100644 index 0000000..7894055 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/obj_detection_and_segmentation.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/optimizers_no_align.jpg b/src/year2/machine-learning-for-computer-vision/img/optimizers_no_align.jpg new file mode 100644 index 0000000..0bb8f88 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/optimizers_no_align.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/regnet.jpg b/src/year2/machine-learning-for-computer-vision/img/regnet.jpg new file mode 100644 index 0000000..ae70e02 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/regnet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/rmsprop.jpg b/src/year2/machine-learning-for-computer-vision/img/rmsprop.jpg new file mode 100644 index 0000000..112fe4d Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/rmsprop.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/se_resnet.jpg b/src/year2/machine-learning-for-computer-vision/img/se_resnet.jpg new file mode 100644 index 0000000..97f631c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/se_resnet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/segmentation_types.jpg b/src/year2/machine-learning-for-computer-vision/img/segmentation_types.jpg new file mode 100644 index 0000000..5bdf1ef Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/segmentation_types.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/selective_search.jpg b/src/year2/machine-learning-for-computer-vision/img/selective_search.jpg new file mode 100644 index 0000000..e94416b Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/selective_search.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/sgd_canyon.jpg b/src/year2/machine-learning-for-computer-vision/img/sgd_canyon.jpg new file mode 100644 index 0000000..f1a9720 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/sgd_canyon.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/sgd_local_minima.jpg b/src/year2/machine-learning-for-computer-vision/img/sgd_local_minima.jpg new file mode 100644 index 0000000..a4c6f5c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/sgd_local_minima.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/sgd_sphere.jpg b/src/year2/machine-learning-for-computer-vision/img/sgd_sphere.jpg new file mode 100644 index 0000000..a058e72 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/sgd_sphere.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/single_model_scaling.jpg b/src/year2/machine-learning-for-computer-vision/img/single_model_scaling.jpg new file mode 100644 index 0000000..7e543a6 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/single_model_scaling.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/stereo_correspondence.jpg b/src/year2/machine-learning-for-computer-vision/img/stereo_correspondence.jpg new file mode 100644 index 0000000..e58887c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/stereo_correspondence.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/traditional_attention.jpg b/src/year2/machine-learning-for-computer-vision/img/traditional_attention.jpg new file mode 100644 index 0000000..68c05e9 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/traditional_attention.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/transformer.jpg b/src/year2/machine-learning-for-computer-vision/img/transformer.jpg new file mode 100644 index 0000000..2fb6c11 Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/transformer.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/img/wide_resnet.jpg b/src/year2/machine-learning-for-computer-vision/img/wide_resnet.jpg new file mode 100644 index 0000000..b28857c Binary files /dev/null and b/src/year2/machine-learning-for-computer-vision/img/wide_resnet.jpg differ diff --git a/src/year2/machine-learning-for-computer-vision/sections/_architectures.tex b/src/year2/machine-learning-for-computer-vision/sections/_architectures.tex index ca9b35d..0f6e36c 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_architectures.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_architectures.tex @@ -176,7 +176,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_grouped_conv.pdf} + \includegraphics[width=0.7\linewidth]{./img/_grouped_conv.jpg} \end{figure} By processing the input in smaller chunks, there are the following gains: @@ -195,7 +195,7 @@ Network with bottleneck-block-inspired inception modules. Given the number of branches $G$ and the number of intermediate channels $d$, a ResNeXt block decomposes a bottleneck residual block into $G$ parallel branches that are summed out at the end. \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/_resnext_block.pdf} + \includegraphics[width=0.35\linewidth]{./img/_resnext_block.jpg} \end{figure} \begin{remark} @@ -245,7 +245,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_resnext_to_resnet_l3.pdf} + \includegraphics[width=0.8\linewidth]{./img/_resnext_to_resnet_l3.jpg} \end{figure} \item[First $1 \times 1$ convolution] @@ -253,7 +253,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_resnext_to_resnet_l1.pdf} + \includegraphics[width=0.8\linewidth]{./img/_resnext_to_resnet_l1.jpg} \end{figure} \item[$3 \times 3$ convolution] @@ -261,7 +261,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_resnext_to_resnet_l2.pdf} + \includegraphics[width=0.6\linewidth]{./img/_resnext_to_resnet_l2.jpg} \end{figure} \end{descriptionlist} @@ -301,7 +301,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/se_resnet.png} + \includegraphics[width=0.4\linewidth]{./img/se_resnet.jpg} \caption{SE-ResNet module} \end{figure} \end{description} @@ -331,7 +331,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_depthwise_conv.pdf} + \includegraphics[width=0.45\linewidth]{./img/_depthwise_conv.jpg} \end{figure} \end{description} @@ -351,7 +351,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/_inverted_residual.pdf} + \includegraphics[width=0.4\linewidth]{./img/_inverted_residual.jpg} \end{figure} \end{description} @@ -414,7 +414,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.85\linewidth]{./img/single_model_scaling.png} + \includegraphics[width=0.85\linewidth]{./img/single_model_scaling.jpg} \caption{\parbox[t]{0.7\linewidth}{Top-1 accuracy variation with width, depth, and resolution scaling on EfficientNet}} \end{figure} @@ -423,7 +423,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/compound_scaling.png} + \includegraphics[width=0.45\linewidth]{./img/compound_scaling.jpg} \caption{Width scaling for different fixed depths and resolutions} \end{figure} @@ -450,7 +450,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.95\linewidth]{./img/_model_scaling.pdf} + \includegraphics[width=0.95\linewidth]{./img/_model_scaling.jpg} \caption{Model scaling approaches} \end{figure} @@ -463,7 +463,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/wide_resnet.png} + \includegraphics[width=0.5\linewidth]{./img/wide_resnet.jpg} \end{figure} \begin{remark} @@ -481,7 +481,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/neural_architecture_search.png} + \includegraphics[width=0.45\linewidth]{./img/neural_architecture_search.jpg} \end{figure} \begin{remark} @@ -494,7 +494,7 @@ Network with bottleneck-block-inspired inception modules. Scaling the baseline model (B0) allowed obtaining high accuracies with a controlled number of FLOPs. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/efficientnet_scaling.png} + \includegraphics[width=0.45\linewidth]{./img/efficientnet_scaling.jpg} \end{figure} \end{description} @@ -521,7 +521,7 @@ Network with bottleneck-block-inspired inception modules. \begin{figure}[H] \centering - \includegraphics[width=0.95\linewidth]{./img/regnet.png} + \includegraphics[width=0.95\linewidth]{./img/regnet.jpg} \end{figure} In other words, RegNet defines a $16$-dimensional design space. To evaluate the architectures, the following is done: @@ -532,7 +532,7 @@ Network with bottleneck-block-inspired inception modules. \item Evaluate the design space by plotting $F$. \begin{figure}[H] \centering - \includegraphics[width=0.25\linewidth]{./img/edf.png} + \includegraphics[width=0.25\linewidth]{./img/edf.jpg} \caption{Example of cumulative distribution} \end{figure} diff --git a/src/year2/machine-learning-for-computer-vision/sections/_depth_estimation.tex b/src/year2/machine-learning-for-computer-vision/sections/_depth_estimation.tex index 659ad86..7836ab8 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_depth_estimation.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_depth_estimation.tex @@ -14,7 +14,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/stereo_correspondence.png} + \includegraphics[width=0.65\linewidth]{./img/stereo_correspondence.jpg} \end{figure} \end{description} @@ -56,7 +56,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_stereo_pipeline_naive.pdf} + \includegraphics[width=0.7\linewidth]{./img/_stereo_pipeline_naive.jpg} \end{figure} \item[Reconstruction approach] @@ -64,7 +64,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_stereo_pipeline_reconstruction.pdf} + \includegraphics[width=0.45\linewidth]{./img/_stereo_pipeline_reconstruction.jpg} \end{figure} \end{description} \end{description} @@ -79,7 +79,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_monodepth_naive.pdf} + \includegraphics[width=0.8\linewidth]{./img/_monodepth_naive.jpg} \caption{Naive training flow} \end{figure} @@ -97,7 +97,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/_monodepth_train_naive.pdf} + \includegraphics[width=0.65\linewidth]{./img/_monodepth_train_naive.jpg} \caption{Backward reconstruction from the right image} \end{figure} \end{description} @@ -107,13 +107,13 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/_monodepth_train_correct.pdf} + \includegraphics[width=0.65\linewidth]{./img/_monodepth_train_correct.jpg} \caption{Backward reconstruction from the left image} \end{figure} \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_monodepth_correct.pdf} + \includegraphics[width=0.7\linewidth]{./img/_monodepth_correct.jpg} \caption{Actual training flow} \end{figure} @@ -141,7 +141,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/monodepth_no_lr_results.png} + \includegraphics[width=0.9\linewidth]{./img/monodepth_no_lr_results.jpg} \end{figure} \end{remark} @@ -170,7 +170,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_monodepth_lr.pdf} + \includegraphics[width=0.7\linewidth]{./img/_monodepth_lr.jpg} \end{figure} \item[Inference] @@ -189,7 +189,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/monodepth_lr_results.png} + \includegraphics[width=0.7\linewidth]{./img/monodepth_lr_results.jpg} \caption{Comparison of Monodepth with and without left-right processing} \end{figure} \end{description} @@ -212,7 +212,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/_sfmlearner.pdf} + \includegraphics[width=0.5\linewidth]{./img/_sfmlearner.jpg} \caption{SfMLearner with two nearby images} \end{figure} \end{description} diff --git a/src/year2/machine-learning-for-computer-vision/sections/_generative_models.tex b/src/year2/machine-learning-for-computer-vision/sections/_generative_models.tex index abdd263..b42544d 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_generative_models.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_generative_models.tex @@ -8,7 +8,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/generative_task.png} + \includegraphics[width=0.4\linewidth]{./img/generative_task.jpg} \end{figure} \begin{remark} @@ -16,7 +16,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/image_manifold.png} + \includegraphics[width=0.6\linewidth]{./img/image_manifold.jpg} \end{figure} \end{remark} @@ -33,7 +33,7 @@ Model that takes as input a latent representation and maps it into an output image. \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/latent_for_generation.png} + \includegraphics[width=0.7\linewidth]{./img/latent_for_generation.jpg} \end{figure} \begin{remark} @@ -192,7 +192,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_inception_score.pdf} + \includegraphics[width=0.8\linewidth]{./img/_inception_score.jpg} \end{figure} \end{description} @@ -205,7 +205,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/earth_mover.png} + \includegraphics[width=0.4\linewidth]{./img/earth_mover.jpg} \caption{ \parbox[t]{0.8\linewidth}{ Three cases of density functions distance. The distributions in the first case are closer than the second one. In the third case, they are mostly overlapping. @@ -228,7 +228,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/earth_mover_plan.png} + \includegraphics[width=0.6\linewidth]{./img/earth_mover_plan.jpg} \end{figure} @@ -277,7 +277,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/manifold_precision_recall.png} + \includegraphics[width=0.8\linewidth]{./img/manifold_precision_recall.jpg} \end{figure} @@ -296,7 +296,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_gan_flow.pdf} + \includegraphics[width=0.8\linewidth]{./img/_gan_flow.jpg} \end{figure} @@ -405,7 +405,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/dcgan.png} + \includegraphics[width=0.6\linewidth]{./img/dcgan.jpg} \end{figure} \end{description} @@ -414,7 +414,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/gan_latent_interpolation.png} + \includegraphics[width=0.5\linewidth]{./img/gan_latent_interpolation.jpg} \end{figure} \end{remark} @@ -442,7 +442,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/gan_disjoint.png} + \includegraphics[width=0.7\linewidth]{./img/gan_disjoint.jpg} \end{figure} \indenttbox @@ -480,7 +480,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/_progan.pdf} + \includegraphics[width=0.65\linewidth]{./img/_progan.jpg} \end{figure} \begin{description} @@ -494,7 +494,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_progan_fadein.pdf} + \includegraphics[width=0.7\linewidth]{./img/_progan_fadein.jpg} \caption{ \parbox[t]{0.7\linewidth}{ ProGAN fade-in. (a) is the starting resolution. (b) depicts the fade-in process. (c) represents the network at the end of the training process for this resolution (i.e., with $\alpha=1$) @@ -531,7 +531,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/_stylegan.pdf} + \includegraphics[width=0.4\linewidth]{./img/_stylegan.jpg} \end{figure} \begin{remark} diff --git a/src/year2/machine-learning-for-computer-vision/sections/_metric_learning.tex b/src/year2/machine-learning-for-computer-vision/sections/_metric_learning.tex index 91937fa..7bfc963 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_metric_learning.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_metric_learning.tex @@ -47,7 +47,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_cnn_knn_face_recognition.pdf} + \includegraphics[width=0.7\linewidth]{./img/_cnn_knn_face_recognition.jpg} \end{figure} \begin{remark} @@ -57,7 +57,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_mnist_embeddings.pdf} + \includegraphics[width=0.45\linewidth]{./img/_mnist_embeddings.jpg} \caption{MNIST embeddings in 2D} \end{figure} \end{remark} @@ -89,7 +89,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_siamese_network.pdf} + \includegraphics[width=0.7\linewidth]{./img/_siamese_network.jpg} \end{figure} \item[Contrastive loss] \marginnote{Contrastive loss} @@ -140,7 +140,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/deepid2.png} + \includegraphics[width=0.7\linewidth]{./img/deepid2.jpg} \caption{DeepID2 on a single crop} \end{figure} \end{description} @@ -183,7 +183,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_triplet_loss.pdf} + \includegraphics[width=0.45\linewidth]{./img/_triplet_loss.jpg} \end{figure} \begin{remark} @@ -240,7 +240,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_embedding_l2_norm_effect.pdf} + \includegraphics[width=0.6\linewidth]{./img/_embedding_l2_norm_effect.jpg} \end{figure} \end{remark} @@ -253,7 +253,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_template_matching.pdf} + \includegraphics[width=0.6\linewidth]{./img/_template_matching.jpg} \end{figure} \end{remark} @@ -267,12 +267,12 @@ \centering \begin{subfigure}{0.48\linewidth} \centering - \includegraphics[width=0.5\linewidth]{./img/_arcface_softmax.pdf} + \includegraphics[width=0.5\linewidth]{./img/_arcface_softmax.jpg} \caption{Softmax} \end{subfigure} \begin{subfigure}{0.48\linewidth} \centering - \includegraphics[width=0.5\linewidth]{./img/_arcface_cluster.pdf} + \includegraphics[width=0.5\linewidth]{./img/_arcface_cluster.jpg} \caption{ArcFace} \end{subfigure} \caption{ @@ -288,13 +288,13 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/_arcface_penalty.pdf} + \includegraphics[width=0.65\linewidth]{./img/_arcface_penalty.jpg} \caption{Penalty application with \texttt{Curie} as the correct class} \end{figure} \begin{figure}[H] \centering - \includegraphics[width=0.95\linewidth]{./img/_arcface_flow.png} + \includegraphics[width=0.95\linewidth]{./img/_arcface_flow.jpg} \caption{Overall ArcFace flow} \end{figure} \end{description} @@ -416,7 +416,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_clip_training.pdf} + \includegraphics[width=0.6\linewidth]{./img/_clip_training.jpg} \caption{ \parbox[t]{0.6\linewidth}{CLIP training flow. NT-Xent loss is applied column or row-wise in the dot product matrix.} } @@ -438,7 +438,7 @@ Given an image to classify, it is embedded and compared with the embeddings of prompts referencing the classes (e.g., \texttt{a photo of a [object]}). The closest one is considered as the predicted class. \begin{figure}[H] \centering - \includegraphics[width=0.85\linewidth]{./img/_clip_inference.pdf} + \includegraphics[width=0.85\linewidth]{./img/_clip_inference.jpg} \end{figure} \end{description} @@ -454,12 +454,12 @@ \centering \begin{subfigure}{0.35\linewidth} \centering - \includegraphics[width=\linewidth]{./img/_clip_resnet_distributional_shift.pdf} + \includegraphics[width=\linewidth]{./img/_clip_resnet_distributional_shift.jpg} \end{subfigure} \hfill \begin{subfigure}{0.6\linewidth} \centering - \includegraphics[width=\linewidth]{./img/_clip_resnet_distributional_shift_datasets.pdf} + \includegraphics[width=\linewidth]{./img/_clip_resnet_distributional_shift_datasets.jpg} \end{subfigure} \end{figure} \end{remark} @@ -476,6 +476,6 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/_clip_generation_conditioning.pdf} + \includegraphics[width=0.5\linewidth]{./img/_clip_generation_conditioning.jpg} \end{figure} \end{remark} \ No newline at end of file diff --git a/src/year2/machine-learning-for-computer-vision/sections/_object_detection.tex b/src/year2/machine-learning-for-computer-vision/sections/_object_detection.tex index bea97ce..7d70e7c 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_object_detection.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_object_detection.tex @@ -12,7 +12,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_object_detection_example.pdf} + \includegraphics[width=0.45\linewidth]{./img/_object_detection_example.jpg} \end{figure} \begin{remark} @@ -55,7 +55,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/obj_det_recall_precision.png} + \includegraphics[width=0.7\linewidth]{./img/obj_det_recall_precision.jpg} \caption{ Recall and precision in different scenarios } @@ -68,7 +68,7 @@ Consider the following image and the bounding boxes found by a detector: \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/_example_precision_recall_curve1.pdf} + \includegraphics[width=0.4\linewidth]{./img/_example_precision_recall_curve1.jpg} \caption{ \parbox[t]{0.6\linewidth}{ Ground-truth (yellow boxes) and predictions (orange boxes) with their confidence score @@ -79,7 +79,7 @@ By sorting the confidence scores, it is possible to plot the precision-recall curve by varying the threshold $\rho_\text{min}$: \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/_example_precision_recall_curve2.pdf} + \includegraphics[width=0.4\linewidth]{./img/_example_precision_recall_curve2.jpg} \end{figure} \indenttbox @@ -164,7 +164,7 @@ The training samples and their initial weights are the following: \begin{figure}[H] \centering - \includegraphics[width=0.3\linewidth]{./img/_adaboost_example1.pdf} + \includegraphics[width=0.3\linewidth]{./img/_adaboost_example1.jpg} \end{figure} We want to train an ensemble of $3$ decision stumps $\texttt{WL}_{j}$. @@ -176,7 +176,7 @@ The new reweighed and normalized samples are: \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/_adaboost_example2.pdf} + \includegraphics[width=0.9\linewidth]{./img/_adaboost_example2.jpg} \end{figure} Now, assume that the second classifier learns $x_1 > 10$. The error rate and reweigh factor are: @@ -185,7 +185,7 @@ The new reweighed and normalized samples are: \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_adaboost_example3.pdf} + \includegraphics[width=0.7\linewidth]{./img/_adaboost_example3.jpg} \end{figure} Finally, the third classifier learns $x_2 > 20$. The error rate and reweigh factor are: @@ -220,13 +220,13 @@ \centering \begin{subfigure}{0.6\linewidth} \centering - \includegraphics[width=0.5\linewidth]{./img/_haar_like_example.pdf} + \includegraphics[width=0.5\linewidth]{./img/_haar_like_example.jpg} \caption{Filter applied on a patch} \end{subfigure} \hfill \begin{subfigure}{0.35\linewidth} \centering - \includegraphics[width=0.65\linewidth]{./img/_haar_like_filters_example.pdf} + \includegraphics[width=0.65\linewidth]{./img/_haar_like_filters_example.jpg} \caption{Other possible filters} \end{subfigure} \caption{Example of filters} @@ -247,7 +247,7 @@ In other words, the value at coordinates $(i, j)$ in the integral image is the sum of all the pixels of the original image in an area that starts from the top-left corner and has as bottom-right corner the pixel at $(i, j)$. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_integral_image.pdf} + \includegraphics[width=0.45\linewidth]{./img/_integral_image.jpg} \caption{Example of integral image} \end{figure} @@ -262,7 +262,7 @@ where $A$, $B$, $C$, and $D$ are coordinates defined as in \Cref{fig:integral_image_features}. \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/_integral_image_feature.pdf} + \includegraphics[width=0.5\linewidth]{./img/_integral_image_feature.jpg} \caption{Summation of the pixels in the blue area} \label{fig:integral_image_features} \end{figure} @@ -290,7 +290,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_viola_jones_cascade.pdf} + \includegraphics[width=0.8\linewidth]{./img/_viola_jones_cascade.jpg} \end{figure} \end{description} @@ -341,7 +341,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/_cnn_object_localization.pdf} + \includegraphics[width=0.9\linewidth]{./img/_cnn_object_localization.jpg} \caption{Localizer with AlexNet as feature extractor and 1000 classes} \end{figure} @@ -383,7 +383,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/selective_search.png} + \includegraphics[width=0.45\linewidth]{./img/selective_search.jpg} \caption{Example of some iterations of selective search} \end{figure} @@ -404,7 +404,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_r_cnn.pdf} + \includegraphics[width=0.8\linewidth]{./img/_r_cnn.jpg} \caption{Example of R-CNN using AlexNet} \end{figure} @@ -471,7 +471,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_fast_r_cnn.pdf} + \includegraphics[width=0.8\linewidth]{./img/_fast_r_cnn.jpg} \caption{Example of fast R-CNN using AlexNet} \end{figure} @@ -486,7 +486,7 @@ \end{remark} \begin{figure}[H] \raggedleft - \includegraphics[width=0.85\linewidth]{./img/_roipool_snap.pdf} + \includegraphics[width=0.85\linewidth]{./img/_roipool_snap.jpg} \caption{Project and snap operations} \end{figure} \item Apply max pooling with kernel of approximately size $\left\lceil \frac{H_r}{H_O} \right\rceil \times \left\lceil \frac{W_r}{W_O} \right\rceil$ and stride approximately $\left\lfloor \frac{H_r}{H_O} \right\rfloor \times \left\lfloor \frac{W_r}{W_O} \right\rfloor$. @@ -495,7 +495,7 @@ \end{remark} \begin{figure}[H] \raggedleft - \includegraphics[width=0.85\linewidth]{./img/_roipool_maxpool.pdf} + \includegraphics[width=0.85\linewidth]{./img/_roipool_maxpool.jpg} \caption{Pooling operation with varying kernel size} \end{figure} \end{enumerate} @@ -552,7 +552,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_faster_r_cnn.pdf} + \includegraphics[width=0.8\linewidth]{./img/_faster_r_cnn.jpg} \caption{Example of faster R-CNN using AlexNet} \end{figure} @@ -586,7 +586,7 @@ \begin{figure}[H] \raggedleft - \includegraphics[width=0.8\linewidth]{./img/_rpn_anchor.pdf} + \includegraphics[width=0.8\linewidth]{./img/_rpn_anchor.jpg} \caption{Example of an iteration of a 1-anchor RPN} \end{figure} @@ -606,7 +606,7 @@ \end{enumerate} \begin{figure}[H] \raggedleft - \includegraphics[width=0.7\linewidth]{./img/_rpn_architecture.pdf} + \includegraphics[width=0.7\linewidth]{./img/_rpn_architecture.jpg} \end{figure} \end{description} @@ -656,7 +656,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_image_pyramid_multi_scale.pdf} + \includegraphics[width=0.6\linewidth]{./img/_image_pyramid_multi_scale.jpg} \end{figure} \item[CNN pyramid multi-scale detection] \marginnote{CNN pyramid multi-scale detection} @@ -668,7 +668,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_cnn_pyramid_multi_scale.pdf} + \includegraphics[width=0.8\linewidth]{./img/_cnn_pyramid_multi_scale.jpg} \end{figure} \item[Feature pyramid network (FPN)] \marginnote{Feature pyramid network (FPN)} @@ -676,7 +676,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_fpn_flow.pdf} + \includegraphics[width=0.7\linewidth]{./img/_fpn_flow.jpg} \caption{General FPN flow} \end{figure} @@ -691,7 +691,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/_fpn_top_down.pdf} + \includegraphics[width=0.65\linewidth]{./img/_fpn_top_down.jpg} \caption{FPN top-down flow} \end{figure} \end{description} @@ -701,7 +701,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_faster_r_cnn_fpn.pdf} + \includegraphics[width=0.8\linewidth]{./img/_faster_r_cnn_fpn.jpg} \caption{Example of faster R-CNN with FPN} \end{figure} @@ -733,7 +733,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.95\linewidth]{./img/_one_stage_detector.pdf} + \includegraphics[width=0.95\linewidth]{./img/_one_stage_detector.jpg} \end{figure} \item[Multi-label classification] \marginnote{Multi-label classification} @@ -763,7 +763,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_darknet.pdf} + \includegraphics[width=0.8\linewidth]{./img/_darknet.jpg} \end{figure} \item[Learned anchors] \marginnote{Learned anchors} @@ -798,7 +798,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_retinanet.pdf} + \includegraphics[width=0.8\linewidth]{./img/_retinanet.jpg} \end{figure} \begin{remark} @@ -827,7 +827,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_focal_loss.pdf} + \includegraphics[width=0.45\linewidth]{./img/_focal_loss.jpg} \caption{Focal loss for varying $\gamma$} \end{figure} @@ -875,12 +875,12 @@ \centering \begin{subfigure}{0.49\linewidth} \centering - \includegraphics[width=0.85\linewidth]{./img/_focal_cdf_foreground.pdf} + \includegraphics[width=0.85\linewidth]{./img/_focal_cdf_foreground.jpg} \end{subfigure} \hfill \begin{subfigure}{0.49\linewidth} \centering - \includegraphics[width=0.85\linewidth]{./img/_focal_cdf_background.pdf} + \includegraphics[width=0.85\linewidth]{./img/_focal_cdf_background.jpg} \end{subfigure} \caption{ \parbox[t]{0.7\linewidth}{ @@ -940,7 +940,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/centernet_outputs.png} + \includegraphics[width=0.5\linewidth]{./img/centernet_outputs.jpg} \end{figure} \begin{description} @@ -994,7 +994,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.55\linewidth]{./img/_centernet_other_tasks.png} + \includegraphics[width=0.55\linewidth]{./img/_centernet_other_tasks.jpg} \end{figure} \end{remark} @@ -1012,7 +1012,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/_object_detection_map_speed_plot.pdf} + \includegraphics[width=0.75\linewidth]{./img/_object_detection_map_speed_plot.jpg} \caption{ mAP -- speed comparison of the various object detection approaches } @@ -1055,7 +1055,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/_detr_architecture.pdf} + \includegraphics[width=0.75\linewidth]{./img/_detr_architecture.jpg} \end{figure} \item[Hungarian loss] \marginnote{Hungarian loss} @@ -1085,7 +1085,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/hungarian_loss.png} + \includegraphics[width=0.5\linewidth]{./img/hungarian_loss.jpg} \caption{ Possible permutations and optimal permutation (in orange). } @@ -1102,7 +1102,7 @@ \item[Encoder] The encoder tend to solve a segmentation problem (i.e., determine what the object is). \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/detr_encoder.png} + \includegraphics[width=0.8\linewidth]{./img/detr_encoder.jpg} \caption{ \parbox[t]{0.75\linewidth}{Self-attention map of some pixels at the last encoder. Yellow tiles indicate that the analyzed pixel attends to that patch.} } @@ -1111,7 +1111,7 @@ \item[Decoder] The decoder tend to attend at object boundaries (i.e., determine where the object is). \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/detr_decoder.png} + \includegraphics[width=0.8\linewidth]{./img/detr_decoder.jpg} \caption{ \parbox[t]{0.75\linewidth}{Decoder attention. Highlighted areas have a higher attention weight.} } @@ -1120,7 +1120,7 @@ \item[Object query] Each object query tend to be specialized in recognizing objects in specific areas. \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/detr_object_query.png} + \includegraphics[width=0.8\linewidth]{./img/detr_object_query.jpg} \caption{ \parbox[t]{0.75\linewidth}{Position of the predictions of each object query. Green dots represent small boxes, red large horizontal boxes, and blue large vertical boxes.} } @@ -1161,7 +1161,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/multiscale_comparison.png} + \includegraphics[width=0.65\linewidth]{./img/multiscale_comparison.jpg} \end{figure} \end{remark} @@ -1187,7 +1187,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.15\linewidth]{./img/bifpn.png} + \includegraphics[width=0.15\linewidth]{./img/bifpn.jpg} \end{figure} \end{itemize} \end{description} diff --git a/src/year2/machine-learning-for-computer-vision/sections/_optimizers.tex b/src/year2/machine-learning-for-computer-vision/sections/_optimizers.tex index 5676824..d078220 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_optimizers.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_optimizers.tex @@ -27,7 +27,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/sgd_sphere.png} + \includegraphics[width=0.35\linewidth]{./img/sgd_sphere.jpg} \end{figure} \end{remark} @@ -38,7 +38,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/sgd_canyon.png} + \includegraphics[width=0.8\linewidth]{./img/sgd_canyon.jpg} \end{figure} \end{remark} @@ -47,7 +47,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/sgd_local_minima.png} + \includegraphics[width=0.35\linewidth]{./img/sgd_local_minima.jpg} \end{figure} \end{remark} @@ -67,7 +67,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_2order_optimizer.pdf} + \includegraphics[width=0.7\linewidth]{./img/_2order_optimizer.jpg} \end{figure} \end{description} @@ -75,7 +75,7 @@ Methods that also consider the second-order derivatives when determining the ste For quadratic functions, second-order methods converge in one step. \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/2order_1step.png} + \includegraphics[width=0.35\linewidth]{./img/2order_1step.jpg} \end{figure} \end{remark} @@ -116,7 +116,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/momentum.png} + \includegraphics[width=0.8\linewidth]{./img/momentum.jpg} \caption{ \parbox[t]{0.7\linewidth}{ Plain SGD vs momentum SGD in a sphere and a canyon. In both cases, momentum converges before SGD. @@ -143,13 +143,13 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/nesterov_momentum.png} + \includegraphics[width=0.35\linewidth]{./img/nesterov_momentum.jpg} \caption{Visualization of the step in Nesterov momentum} \end{figure} \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/nesterov_comparison.png} + \includegraphics[width=0.75\linewidth]{./img/nesterov_comparison.jpg} \caption{Plain SGD vs standard momentum vs Nesterov momentum} \end{figure} \end{description} @@ -170,7 +170,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/adaptive_lr.png} + \includegraphics[width=0.35\linewidth]{./img/adaptive_lr.jpg} \caption{ \parbox[t]{0.5\linewidth}{Loss where the $w_1$ parameter has a larger gradient, while $w_2$ has a smaller gradient} } @@ -206,7 +206,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/adagrad.png} + \includegraphics[width=0.4\linewidth]{./img/adagrad.jpg} \caption{ \parbox[t]{0.45\linewidth}{SGD vs AdaGrad. AdaGrad stops before getting close to the minimum.} } @@ -233,7 +233,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/rmsprop.png} + \includegraphics[width=0.35\linewidth]{./img/rmsprop.jpg} \caption{SGD vs AdaGrad vs RMSProp} \end{figure} \end{description} @@ -271,13 +271,13 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/adam.png} + \includegraphics[width=0.75\linewidth]{./img/adam.jpg} \caption{SGD vs AdaGrad vs RMSProp vs Adam} \end{figure} \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/adam_noisy.png} + \includegraphics[width=0.75\linewidth]{./img/adam_noisy.jpg} \caption{SGD vs AdaGrad vs RMSProp vs Adam with a smaller batch size} \end{figure} @@ -286,7 +286,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/optimizers_no_align.png} + \includegraphics[width=0.8\linewidth]{./img/optimizers_no_align.jpg} \end{figure} \end{remark} \end{description} @@ -300,7 +300,7 @@ Methods that also consider the second-order derivatives when determining the ste \begin{figure}[H] \centering - \includegraphics[width=0.35\linewidth]{./img/momentum_local_global.png} + \includegraphics[width=0.35\linewidth]{./img/momentum_local_global.jpg} \end{figure} \end{remark} diff --git a/src/year2/machine-learning-for-computer-vision/sections/_segmentation.tex b/src/year2/machine-learning-for-computer-vision/sections/_segmentation.tex index 8607840..6f2a624 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_segmentation.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_segmentation.tex @@ -49,7 +49,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/motion_data.png} + \includegraphics[width=0.75\linewidth]{./img/motion_data.jpg} \end{figure} \item[Depth comparison features] \marginnote{Depth comparison features} @@ -63,7 +63,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_depth_comparison_features.pdf} + \includegraphics[width=0.6\linewidth]{./img/_depth_comparison_features.jpg} \caption{Examples of feature computation} \end{figure} @@ -76,7 +76,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_depth_invariant_offset.pdf} + \includegraphics[width=0.6\linewidth]{./img/_depth_invariant_offset.jpg} \end{figure} \end{description} @@ -101,7 +101,7 @@ \begin{figure}[H] \raggedleft - \includegraphics[width=0.7\linewidth]{./img/_random_forest_bagging.pdf} + \includegraphics[width=0.7\linewidth]{./img/_random_forest_bagging.jpg} \end{figure} \item[Random splitting] \marginnote{Random splitting} @@ -111,7 +111,7 @@ \begin{figure}[H] \raggedleft - \includegraphics[width=0.7\linewidth]{./img/_random_forest_random_splitting.pdf} + \includegraphics[width=0.7\linewidth]{./img/_random_forest_random_splitting.jpg} \end{figure} \end{description} @@ -138,7 +138,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/_segmentation_rcnn.pdf} + \includegraphics[width=0.9\linewidth]{./img/_segmentation_rcnn.jpg} \caption{R-CNN for segmentation with $20$ ($+1$) classes} \end{figure} \end{description} @@ -177,7 +177,7 @@ \begin{figure}[H] \raggedleft - \includegraphics[width=0.85\linewidth]{./img/_fcn_32.pdf} + \includegraphics[width=0.85\linewidth]{./img/_fcn_32.jpg} \end{figure} \item[FCN-16S] @@ -189,7 +189,7 @@ \begin{figure}[H] \raggedleft - \includegraphics[width=0.85\linewidth]{./img/_fcn_16.pdf} + \includegraphics[width=0.85\linewidth]{./img/_fcn_16.jpg} \end{figure} \item[FCN-8S] @@ -197,7 +197,7 @@ \begin{figure}[H] \raggedleft - \includegraphics[width=0.85\linewidth]{./img/_fcn_8.pdf} + \includegraphics[width=0.85\linewidth]{./img/_fcn_8.jpg} \end{figure} \end{descriptionlist} @@ -228,7 +228,7 @@ Consider images with $1$ channel. Given a $3 \times 3$ input image and a $3 \times 3$ transposed convolution kernel with stride $2$, the output activation has spatial dimension $5 \times 5$ and is obtained as follows: \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/_transposed_convolution.pdf} + \includegraphics[width=0.9\linewidth]{./img/_transposed_convolution.jpg} \end{figure} \end{example} @@ -267,7 +267,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.6\linewidth]{./img/_unet.pdf} + \includegraphics[width=0.6\linewidth]{./img/_unet.jpg} \caption{ \parbox[t]{0.7\linewidth}{ U-Net structure. Note that in the original paper, convolutions have padding \texttt{valid} and the input is provided from a sliding window. Modern implementations have padding \texttt{same} and same input and output spatial dimension. @@ -306,7 +306,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_dilated_convolution.pdf} + \includegraphics[width=0.7\linewidth]{./img/_dilated_convolution.jpg} \caption{Example of $3 \times 3$ dilated convolutions with increasing dilation rate} \end{figure} @@ -320,7 +320,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/_dilated_convolution_exponential.pdf} + \includegraphics[width=0.9\linewidth]{./img/_dilated_convolution_exponential.jpg} \end{figure} \end{remark} @@ -342,12 +342,12 @@ \centering \begin{subfigure}{0.45\linewidth} \centering - \includegraphics[width=0.9\linewidth]{./img/_dilated_resnet_stage1.pdf} + \includegraphics[width=0.9\linewidth]{./img/_dilated_resnet_stage1.jpg} \caption{ResNet with standard stages} \end{subfigure} \begin{subfigure}{0.45\linewidth} \centering - \includegraphics[width=0.9\linewidth]{./img/_dilated_resnet_stage2.pdf} + \includegraphics[width=0.9\linewidth]{./img/_dilated_resnet_stage2.jpg} \caption{ResNet with two dilated stages} \end{subfigure} \end{figure} @@ -355,7 +355,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.9\linewidth]{./img/_dilated_resnet.pdf} + \includegraphics[width=0.9\linewidth]{./img/_dilated_resnet.jpg} \caption{Dilated ResNet with total stride $8$} \end{figure} @@ -381,7 +381,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.95\linewidth]{./img/_deeplabv3.pdf} + \includegraphics[width=0.95\linewidth]{./img/_deeplabv3.jpg} \end{figure} \item[DeepLab v3+] \marginnote{DeepLab v3+} @@ -391,18 +391,18 @@ \centering \begin{subfigure}{0.3\linewidth} \centering - \includegraphics[width=0.9\linewidth]{./img/_deeplabv3plus_1.pdf} + \includegraphics[width=0.9\linewidth]{./img/_deeplabv3plus_1.jpg} \caption{DeepLab v3} \end{subfigure} \begin{subfigure}{0.3\linewidth} \centering - \includegraphics[width=0.9\linewidth]{./img/_deeplabv3plus_2.pdf} + \includegraphics[width=0.9\linewidth]{./img/_deeplabv3plus_2.jpg} \caption{U-Net} \end{subfigure} \hfill \begin{subfigure}{0.3\linewidth} \centering - \includegraphics[width=0.9\linewidth]{./img/_deeplabv3plus_3.pdf} + \includegraphics[width=0.9\linewidth]{./img/_deeplabv3plus_3.jpg} \caption{DeepLab v3+} \end{subfigure} \end{figure} @@ -418,7 +418,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/obj_detection_and_segmentation.png} + \includegraphics[width=0.7\linewidth]{./img/obj_detection_and_segmentation.jpg} \end{figure} \end{description} @@ -436,17 +436,17 @@ \item Divide the proposal into equal subregions without snapping to grid. \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_roi_align1.pdf} + \includegraphics[width=0.7\linewidth]{./img/_roi_align1.jpg} \end{figure} \item Sample some values following a regular grid within each subregion. Use bilinear interpolation to determine the values of the sampled points (as they are most likely not be pixel-perfect). \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_roi_align2.pdf} + \includegraphics[width=0.7\linewidth]{./img/_roi_align2.jpg} \end{figure} \item Max or average pool the sampled values in each subregion. \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_roi_align3.pdf} + \includegraphics[width=0.7\linewidth]{./img/_roi_align3.jpg} \end{figure} \end{enumerate} @@ -461,13 +461,13 @@ \begin{figure}[H] \centering - \includegraphics[width=0.85\linewidth]{./img/_mask_rcnn_head.pdf} + \includegraphics[width=0.85\linewidth]{./img/_mask_rcnn_head.jpg} \end{figure} \end{description} \begin{figure}[H] \centering - \includegraphics[width=0.85\linewidth]{./img/_mask_rcnn.pdf} + \includegraphics[width=0.85\linewidth]{./img/_mask_rcnn.jpg} \caption{Overall architecture of mask R-CNN} \end{figure} @@ -504,7 +504,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/segmentation_types.png} + \includegraphics[width=0.5\linewidth]{./img/segmentation_types.jpg} \end{figure} \end{description} @@ -523,7 +523,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.85\linewidth]{./img/_panoptic_fpn.pdf} + \includegraphics[width=0.85\linewidth]{./img/_panoptic_fpn.jpg} \end{figure} \end{description} @@ -550,7 +550,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/_maskformer_naive.pdf} + \includegraphics[width=0.75\linewidth]{./img/_maskformer_naive.jpg} \end{figure} \item[Architecture (pixel decoder)] @@ -563,7 +563,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/_maskformer_decoder.pdf} + \includegraphics[width=0.75\linewidth]{./img/_maskformer_decoder.jpg} \end{figure} \item[Inference] @@ -584,7 +584,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_maskformer_inference.pdf} + \includegraphics[width=0.8\linewidth]{./img/_maskformer_inference.jpg} \end{figure} \end{description} @@ -607,7 +607,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_mask2former.pdf} + \includegraphics[width=0.45\linewidth]{./img/_mask2former.jpg} \end{figure} \end{description} @@ -628,7 +628,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/_spp.pdf} + \includegraphics[width=0.5\linewidth]{./img/_spp.jpg} \end{figure} \end{description} @@ -639,7 +639,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/aspp_deeplabv2.png} + \includegraphics[width=0.5\linewidth]{./img/aspp_deeplabv2.jpg} \end{figure} \begin{remark} @@ -647,7 +647,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_dilated_conv_weights.pdf} + \includegraphics[width=0.8\linewidth]{./img/_dilated_conv_weights.jpg} \end{figure} \end{remark} @@ -658,7 +658,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/_deeplabv3_aspp.pdf} + \includegraphics[width=0.75\linewidth]{./img/_deeplabv3_aspp.jpg} \caption{ ASPP with stride-16. With stride-8, rates are doubled. } diff --git a/src/year2/machine-learning-for-computer-vision/sections/_transformers.tex b/src/year2/machine-learning-for-computer-vision/sections/_transformers.tex index f65e746..e5040b3 100644 --- a/src/year2/machine-learning-for-computer-vision/sections/_transformers.tex +++ b/src/year2/machine-learning-for-computer-vision/sections/_transformers.tex @@ -9,7 +9,7 @@ Neural architecture designed for NLP sequence-to-sequence tasks. It heavily relies on the attention mechanism. \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/transformer.png} + \includegraphics[width=0.4\linewidth]{./img/transformer.jpg} \end{figure} \item[Autoregressive generation] \marginnote{Autoregressive generation} @@ -17,7 +17,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.3\linewidth]{./img/_transformer_autoregressive.pdf} + \includegraphics[width=0.3\linewidth]{./img/_transformer_autoregressive.jpg} \caption{Example of autoregressive generation} \end{figure} \end{description} @@ -32,7 +32,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.3\linewidth]{./img/traditional_attention.png} + \includegraphics[width=0.3\linewidth]{./img/traditional_attention.jpg} \caption{Attention weights for machine translation} \end{figure} @@ -59,7 +59,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_dot_product_attention.pdf} + \includegraphics[width=0.8\linewidth]{./img/_dot_product_attention.jpg} % \caption{Steps of dot-product attention} \end{figure} @@ -84,7 +84,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_scaled_dot_attention.pdf} + \includegraphics[width=0.8\linewidth]{./img/_scaled_dot_attention.jpg} % \caption{Steps of scaled dot-product attention} \end{figure} @@ -95,7 +95,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_scaled_dot_attention_multi_q.pdf} + \includegraphics[width=0.8\linewidth]{./img/_scaled_dot_attention_multi_q.jpg} % \caption{Steps of scaled dot-product attention with multidimensional queries} \end{figure} @@ -106,7 +106,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_self_attention.pdf} + \includegraphics[width=0.8\linewidth]{./img/_self_attention.jpg} % \caption{Steps of self-attention} \end{figure} \end{description} @@ -120,7 +120,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/_transformer_embeddings.pdf} + \includegraphics[width=0.4\linewidth]{./img/_transformer_embeddings.jpg} \end{figure} \end{description} @@ -139,7 +139,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.7\linewidth]{./img/_multi_head_attention.pdf} + \includegraphics[width=0.7\linewidth]{./img/_multi_head_attention.jpg} \caption{\texttt{MHSA} with two heads} \end{figure} @@ -171,7 +171,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/norm_methods.png} + \includegraphics[width=0.8\linewidth]{./img/norm_methods.jpg} \caption{Affected axis of normalization methods} \end{figure} @@ -225,12 +225,12 @@ \centering \begin{subfigure}{0.40\linewidth} \centering - \includegraphics[width=0.8\linewidth]{./img/_post_norm_encoder.pdf} + \includegraphics[width=0.8\linewidth]{./img/_post_norm_encoder.jpg} \caption{Encoder in post-norm transformer} \end{subfigure} \begin{subfigure}{0.40\linewidth} \centering - \includegraphics[width=0.8\linewidth]{./img/_pre_norm_encoder.pdf} + \includegraphics[width=0.8\linewidth]{./img/_pre_norm_encoder.jpg} \caption{Encoder in pre-norm transformer} \end{subfigure} \end{figure} @@ -268,7 +268,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.75\linewidth]{./img/_cross_attention.pdf} + \includegraphics[width=0.75\linewidth]{./img/_cross_attention.jpg} \caption{Cross-attention data flow} \end{figure} @@ -282,7 +282,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.5\linewidth]{./img/_transformer_decoder.pdf} + \includegraphics[width=0.5\linewidth]{./img/_transformer_decoder.jpg} \caption{Decoder in post-norm transformer} \end{figure} @@ -305,7 +305,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_masked_self_attention.pdf} + \includegraphics[width=0.8\linewidth]{./img/_masked_self_attention.jpg} \end{figure} \end{description} \end{description} @@ -318,7 +318,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_self_attention_permutation.pdf} + \includegraphics[width=0.8\linewidth]{./img/_self_attention_permutation.jpg} \end{figure} \end{remark} @@ -344,7 +344,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.65\linewidth]{./img/_transformer_position_encoding.pdf} + \includegraphics[width=0.65\linewidth]{./img/_transformer_position_encoding.jpg} \end{figure} \end{description} @@ -386,7 +386,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.8\linewidth]{./img/_vit_patch.pdf} + \includegraphics[width=0.8\linewidth]{./img/_vit_patch.jpg} \end{figure} \item[Vision transformer (ViT)] \marginnote{Vision transformer (ViT)} @@ -398,7 +398,7 @@ \begin{figure}[H] \centering - \includegraphics[width=0.55\linewidth]{./img/_vision_transformer.pdf} + \includegraphics[width=0.55\linewidth]{./img/_vision_transformer.jpg} \end{figure} \begin{remark} @@ -431,7 +431,7 @@ \item The first embedding projection $W_E$ for RGB images shows a similar behavior to convolutions as they tend to recognize edges and color variations. \begin{figure}[H] \centering - \includegraphics[width=0.45\linewidth]{./img/_vit_projection_rgb.pdf} + \includegraphics[width=0.45\linewidth]{./img/_vit_projection_rgb.jpg} \caption{ \parbox[t]{0.7\linewidth}{ Visualization of the columns of the patches linear projection matrix $W_E$. Each column has shape $3P^2$ and can be reshaped to be a $3 \times P \times P$ image. @@ -442,7 +442,7 @@ \item The learned positional embeddings are able to encode information about the row and column positioning of the patches. \begin{figure}[H] \centering - \includegraphics[width=0.33\linewidth]{./img/_vit_embedding_similarity.pdf} + \includegraphics[width=0.33\linewidth]{./img/_vit_embedding_similarity.jpg} \caption{ \parbox[t]{0.7\linewidth}{ Cosine similarity of the positional encoding of each patch compared to all the others @@ -453,7 +453,7 @@ \item Attention heads at the lower layers attend at both positions around the patch and far from them. Higher layers, as with convolutions, attend to distant patches. \begin{figure}[H] \centering - \includegraphics[width=0.33\linewidth]{./img/_vit_head_distance.pdf} + \includegraphics[width=0.33\linewidth]{./img/_vit_head_distance.jpg} \caption{ Mean attention distance of the heads of ViT-large/16 } @@ -462,7 +462,7 @@ \item On ImageNet top-1 accuracy, ViT outperforms a large ResNet only when pre-trained on a large dataset. \begin{figure}[H] \centering - \includegraphics[width=0.4\linewidth]{./img/_vit_results.pdf} + \includegraphics[width=0.4\linewidth]{./img/_vit_results.jpg} \caption{ \parbox[t]{0.7\linewidth}{ ImageNet top-1 accuracy with different pre-training datasets. BiT represents ResNet (two variants). diff --git a/utils/compress_pdfs.sh b/utils/compress_pdfs.sh new file mode 100644 index 0000000..2470805 --- /dev/null +++ b/utils/compress_pdfs.sh @@ -0,0 +1,12 @@ +cd $1 + +for f in *.pdf; do + echo "Rewriting $f" + name="${f%.*}" + magick \ + -density 200 \ + $name.pdf \ + -quality 50 \ + -flatten \ + $name.jpg +done \ No newline at end of file diff --git a/utils/compress_pngs.sh b/utils/compress_pngs.sh new file mode 100644 index 0000000..82afd7e --- /dev/null +++ b/utils/compress_pngs.sh @@ -0,0 +1,11 @@ +cd $1 + +for f in *.png; do + echo "Rewriting $f" + name="${f%.*}" + magick \ + $name.png \ + -quality 50 \ + -flatten \ + $name.jpg +done \ No newline at end of file