package com.aduech.www.datascience;

import android.os.Build;
import android.os.Bundle;
import android.text.Html;
import android.text.method.LinkMovementMethod;
import android.widget.TextView;
import androidx.appcompat.app.AppCompatActivity;

/* loaded from: classes.dex */
public class ViewDatasetActivity extends AppCompatActivity {
    int ID;
    String name;
    String t0 = "<h2><a href=\"http://yann.lecun.com/exdb/mnist/\" target=\"_blank\" rel=\"noopener\">MNIST</a></h2>\n<p>MNIST is one of the most popular deep learning datasets out there. It&#8217;s a dataset of handwritten digits and contains a training set of 60,000 examples and a test set of 10,000 examples. It&#8217;s a good database for trying learning techniques and deep recognition patterns on real-world data while spending minimum time and effort in data preprocessing.</p>\n<p><strong>Size: </strong>~50 MB</p>\n<p><strong>Number of Records:</strong> 70,000 images in 10 classes</p>\n<p><strong>SOTA</strong>: <a href=\"https://arxiv.org/pdf/1710.09829.pdf\" target=\"_blank\" rel=\"noopener\">Dynamic Routing Between Capsules </a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://cocodataset.org/#home\" target=\"_blank\" rel=\"noopener\">MS-COCO</a></h2>\n<p>COCO is a large-scale and rich for object detection, segmentation and captioning dataset. It has several features:</p>\n<ul>\n<li class=\"checkBullet\">Object segmentation</li>\n<li class=\"checkBullet\">Recognition in context</li>\n<li class=\"checkBullet\">Superpixel stuff segmentation</li>\n<li class=\"checkBullet\">330K images (&gt;200K labeled)</li>\n<li class=\"checkBullet\">1.5 million object instances</li>\n<li class=\"checkBullet\">80 object categories</li>\n<li class=\"checkBullet\">91 stuff categories</li>\n<li class=\"checkBullet\">5 captions per image</li>\n<li class=\"checkBullet cye-lm-tag\">250,000 people with keypoints</li>\n</ul>\n<p><strong>Size:</strong> ~25 GB (Compressed)</p>\n<p><strong>Number of Records:</strong> <span style=\"font-weight: 400;\">330K images, 80 object categories, 5 captions per image, 250,000 people with key points</span></p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1703.06870.pdf\" target=\"_blank\" rel=\"noopener\">Mask R-CNN</a></p>\n<p>&nbsp;</p><hr />\n<h2><a href=\"http://www.image-net.org\" target=\"_blank\" rel=\"noopener\">ImageNet</a></h2>\n<p>ImageNet is a dataset of images that are organized according to the <a href=\"https://wordnet.princeton.edu\" target=\"_blank\" rel=\"noopener\">WordNet</a> hierarchy. WordNet contains approximately 100,000 phrases and ImageNet has provided around 1000 images on average to illustrate each phrase.</p>\n<p><strong>Size:</strong> ~150GB</p>\n<p><strong>Number of Records:</strong> <span style=\"font-weight: 400;\">Total number of images: ~1,500,000; each with multiple bounding boxes and respective class labels</span></p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1611.05431.pdf\" target=\"_blank\" rel=\"noopener\">Aggregated Residual Transformations for Deep Neural Networks </a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://github.com/openimages/dataset\" target=\"_blank\" rel=\"noopener\">Open Images Dataset</a></h2>\n<p>Open Images is a dataset of almost 9 million URLs for images. These images have been annotated with image-level labels bounding boxes spanning thousands of classes. The dataset contains a training set of 9,011,219 images, a validation set of 41,260 images and a test set of 125,436 images.</p>\n<p><strong>Size:</strong> 500 GB (Compressed)</p>\n<p><strong>Number of Records:</strong> 9,011,219 images with more than 5k labels</p>\n<p><strong>SOTA</strong> : Resnet 101 image classification model (trained on V2 data): <a href=\"https://storage.googleapis.com/openimages/2017_07/oidv2-resnet_v1_101.ckpt.tar.gz\" target=\"_blank\" rel=\"noopener\">Model checkpoint</a>, <a href=\"https://storage.googleapis.com/openimages/2017_07/oidv2-resnet_v1_101.readme.txt\" target=\"_blank\" rel=\"noopener\">Checkpoint readme</a>, <a href=\"https://github.com/openimages/dataset/blob/master/tools/classify_oidv2.py\" target=\"_blank\" rel=\"noopener\">Inference code</a>.</p>\n<p>&nbsp;</p>\n<h2><a href=\"http://www.visualqa.org\" target=\"_blank\" rel=\"noopener\">VisualQA</a></h2>\n<p>VQA is a dataset containing open-ended questions about images. These questions require an understanding of vision and language. Some of the interesting features of this dataset are:</p>\n<ul>\n<li>265,016 images (COCO and abstract scenes)</li>\n<li>At least 3 questions (5.4 questions on average) per image</li>\n<li>10 ground truth answers per question</li>\n<li>3 plausible (but likely incorrect) answers per question</li>\n<li class=\"cye-lm-tag\">Automatic evaluation metric</li>\n</ul>\n<p><strong>Size:</strong> 25 GB (Compressed)</p>\n<p><strong>Number of Records:</strong> <span style=\"font-weight: 400;\">265,016 images, at least 3 questions per image, 10 ground truth answers per question</span></p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/abs/1708.02711\" target=\"_blank\" rel=\"noopener\">Tips and Tricks for Visual Question Answering: Learnings from the 2017 Challenge</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://ufldl.stanford.edu/housenumbers/\" target=\"_blank\" rel=\"noopener\">The Street View House Numbers (SVHN)</a></h2>\n<p>This is a real-world image dataset for developing object detection algorithms. This requires minimum data preprocessing. It is similar to the MNIST dataset mentioned in this list, but has more labelled data (over 600,000 images). The data has been collected from house numbers viewed in Google Street View.</p>\n<p><strong>Size:</strong> 2.5 GB</p>\n<p><strong>Number of Records:</strong> 6,30,420 images in 10 classes</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1507.00677.pdf\" target=\"_blank\" rel=\"noopener\">Distributional Smoothing With Virtual Adversarial Training</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://www.cs.toronto.edu/~kriz/cifar.html\" target=\"_blank\" rel=\"noopener\">CIFAR-10</a></h2>\n<p>This dataset is another one for image classification. It consists of 60,000 images of 10 classes (each class is represented as a row in the above image). In total, there are 50,000 training images and 10,000 test images. The dataset is divided into 6 parts &#8211; 5 training batches and 1 test batch. Each batch has 10,000 images.</p>\n<p><strong>Size:</strong> 170 MB</p>\n<p><strong>Number of Records:</strong> 60,000 images in 10 classes</p>\n<p><strong>SOTA</strong> : <a href=\"https://openreview.net/pdf?id=S1NHaMW0b\" target=\"_blank\" rel=\"noopener\">ShakeDrop regularization </a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://github.com/zalandoresearch/fashion-mnist\" target=\"_blank\" rel=\"noopener\">Fashion-MNIST</a></h2>\n<p>Fashion-MNIST consists of 60,000 training images and 10,000 test images. It is a MNIST-like fashion product database. The developers believe MNIST has been overused so they created this as a direct replacement for that dataset. Each image is in greyscale and associated with a label from 10 classes.</p>\n<p><strong>Size:</strong> 30 MB</p>\n<p><strong>Number of Records:</strong> 70,000 images in 10 classes</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/abs/1708.04896\" target=\"_blank\" rel=\"noopener\">Random Erasing Data Augmentation </a></p>";
    String t1 = "<h2><a href=\"http://ai.stanford.edu/~amaas/data/sentiment/\" target=\"_blank\" rel=\"noopener\">IMDB Reviews</a></h2>\n<p>This is a dream dataset for movie lovers. It is meant for binary sentiment classification and has far more data than any previous datasets in this field. Apart from the training and test review examples, there is further unlabeled data for use as well. Raw text and preprocessed bag of words formats have also been included.</p>\n<p><strong>Size:</strong> 80 MB</p>\n<p><strong>Number of Records:</strong> 25,000 highly polar movie reviews for training, and 25,000 for testing</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/abs/1705.09207\" target=\"_blank\" rel=\"noopener\">Learning Structured Text Representations </a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://archive.ics.uci.edu/ml/datasets/Twenty+Newsgroups\" target=\"_blank\" rel=\"noopener\">Twenty Newsgroups</a></h2>\n<p>This dataset, as the name suggests, contains information about newsgroups. To curate this dataset, 1000 Usenet articles were taken from 20 different newsgroups. The articles have typical features like subject lines, signatures, and quotes.</p>\n<p><strong>Size:</strong> 20 MB</p>\n<p><strong>Number of Records:</strong> 20,000 messages taken from 20 newsgroups</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/abs/1606.01781\" target=\"_blank\" rel=\"noopener\">Very Deep Convolutional Networks for Text Classification</a>,</p>\n<p>&nbsp;</p>\n<h2><a href=\"http://help.sentiment140.com/for-students/\" target=\"_blank\" rel=\"noopener\">Sentiment140</a></h2>\n<p>Sentiment140 is a dataset that can be used for sentiment analysis. A popular dataset, it is perfect to start off your NLP journey. Emotions have been pre-removed from the data. The final dataset has the below 6 features:</p>\n<ul>\n<li>polarity of the tweet</li>\n<li>id of the tweet</li>\n<li>date of the tweet</li>\n<li>the query</li>\n<li>username of the tweeter</li>\n<li>text of the tweet</li>\n</ul>\n<p><strong>Size:</strong> 80 MB (Compressed)</p>\n<p><strong>Number of Records:</strong> 1,60,000 tweets</p>\n<p><strong>SOTA</strong> : <a href=\"http://www.aclweb.org/anthology/W17-5202\" target=\"_blank\" rel=\"noopener\">Assessing State-of-the-Art Sentiment Models on State-of-the-Art Sentiment Datasets</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://wordnet.princeton.edu/\" target=\"_blank\" rel=\"noopener\">WordNet</a></h2>\n<p>Mentioned in the ImageNet dataset above, WordNet is a large database of English synsets. Synsets are groups of synonyms that each describe a different concept. WordNet&#8217;s structure makes it a very useful tool for NLP.</p>\n<p><strong>Size:</strong> 10 MB</p>\n<p><strong>Number of Records:</strong> <span style=\"font-weight: 400;\">117,000 synsets is linked to other synsets by means of a small number of “conceptual relations.</span></p>\n<p><strong>SOTA</strong> : <a href=\"https://aclanthology.info/pdf/R/R11/R11-1097.pdf\" target=\"_blank\" rel=\"noopener\">Wordnets: State of the Art and Perspectives</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://www.yelp.com/dataset\" target=\"_blank\" rel=\"noopener\">Yelp Reviews</a></h2>\n<p>This is an open dataset released by Yelp for learning purposes. It consists of millions of user reviews, businesses attributes and over 200,000 pictures from multiple metropolitan areas. This is a very commonly used dataset for NLP challenges globally.</p>\n<p><strong>Size:</strong> 2.66 GB JSON, 2.9 GB SQL and 7.5 GB Photos (all compressed)</p>\n<p><strong>Number of Records:</strong> 5,200,000 reviews, 174,000 business attributes, 200,000 pictures and 11 metropolitan areas</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1710.00519.pdf\" target=\"_blank\" rel=\"noopener\">Attentive Convolution</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://nlp.cs.nyu.edu/wikipedia-data/\" target=\"_blank\" rel=\"noopener\">The Wikipedia Corpus</a></h2>\n<p>This dataset is a collection of a the full text on Wikipedia. It contains almost 1.9 billion words from more than 4 million articles. What makes this a powerful NLP dataset is that you search by word, phrase or part of a paragraph itself.</p>\n<p><strong>Size:</strong> 20 MB</p>\n<p><strong>Number of Records:</strong> 4,400,000 articles containing 1.9 billion words</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1711.03953.pdf\" target=\"_blank\" rel=\"noopener\">Breaking The Softmax Bottelneck: A High-Rank RNN language Model</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://u.cs.biu.ac.il/~koppel/BlogCorpus.htm\" target=\"_blank\" rel=\"noopener\">The Blog Authorship Corpus</a></h2>\n<p>This dataset consists of blog posts collected from thousands of bloggers and has been gathered from blogger.com. Each blog is provided as a separate file. Each blog contains a minimum of 200 occurrences of commonly used English words.</p>\n<p><strong>Size:</strong> 300 MB</p>\n<p><strong>Number of Records:</strong> 681,288 posts with over 140 million words</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1609.06686.pdf\" target=\"_blank\" rel=\"noopener\">Character-level and Multi-channel Convolutional Neural Networks for Large-scale Authorship Attribution</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://statmt.org/wmt18/index.html\" target=\"_blank\" rel=\"noopener\">Machine Translation of Various Languages</a></h2>\n<p>This dataset consists of training data for four European languages. The task here is to improve the current translation methods. You can participate in any of the following language pairs:</p>\n<ul class=\"cye-lm-tag\">\n<li>English-Chinese and Chinese-English</li>\n<li>English-Czech and Czech-English</li>\n<li>English-Estonian and Estonian-English</li>\n<li>English-Finnish and Finnish-English</li>\n<li>English-German and German-English</li>\n<li>English-Kazakh and Kazakh-English</li>\n<li>English-Russian and Russian-English</li>\n<li>English-Turkish and Turkish-English</li>\n</ul>\n<p><strong>Size:</strong> ~15 GB</p>\n<p><strong>Number of Records:</strong> ~30,000,000 sentences and their translations</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/abs/1706.03762\" target=\"_blank\" rel=\"noopener\">Attention Is All You Need</a></p>\n<p>&nbsp;";
    String t2 = "<h2><a href=\"https://github.com/Jakobovski/free-spoken-digit-dataset\">Free Spoken Digit Dataset</a></h2>\n<p>Another entry in this list for inspired by the MNIST dataset! This one was created to solve the task of identifying spoken digits in audio samples. It&#8217;s an open dataset so the hope is that it will keep growing as people keep contributing more samples. Currently, it contains the below characteristics:</p>\n<ul class=\"cye-lm-tag\">\n<li>3 speakers</li>\n<li>1,500 recordings (50 of each digit per speaker)</li>\n<li class=\"cye-lm-tag\">English pronunciations</li>\n</ul>\n<p><strong>Size:</strong> 10 MB</p>\n<p><strong>Number of Records: </strong>1,500 audio samples</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1712.00866\" target=\"_blank\" rel=\"noopener\">Raw Waveform-based Audio Classification Using Sample-level CNN Architectures</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://github.com/mdeff/fma\" target=\"_blank\" rel=\"noopener\">Free Music Archive (FMA)</a></h2>\n<p>FMA is a dataset for music analysis. The dataset consists of full-length and HQ audio, pre-computed features, and track and user-level metadata. It an an open dataset created for evaluating several tasks in MIR. Below is the list of csv files the dataset has along with what they include:</p>\n<ul class=\"cye-lm-tag\">\n<li><code>tracks.csv</code>: per track metadata such as ID, title, artist, genres, tags and play counts, for all 106,574 tracks.</li>\n<li><code>genres.csv</code>: all 163 genre IDs with their name and parent (used to infer the genre hierarchy and top-level genres).</li>\n<li><code>features.csv</code>: common features extracted with <a href=\"https://librosa.github.io/librosa/\" rel=\"nofollow\">librosa</a>.</li>\n<li><code>echonest.csv</code>: audio features provided by <a href=\"http://the.echonest.com/\" rel=\"nofollow\">Echonest</a> (now <a href=\"https://www.spotify.com/\" rel=\"nofollow\">Spotify</a>) for a subset of 13,129 tracks.</li>\n</ul>\n<p><strong>Size: </strong>~1000 GB</p>\n<p><strong>Number of Records:</strong> ~100,000 tracks</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/pdf/1803.05337.pdf\" target=\"_blank\" rel=\"noopener\">Learning to Recognize Musical Genre from Audio</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://mtg.upf.edu/ismir2004/contest/tempoContest/node5.html\" target=\"_blank\" rel=\"nofollow noopener\">Ballroom</a></h2>\n<p>This dataset contains ballroom dancing audio files. <span style=\"font-weight: 400;\">A few characteristic excerpts of many dance styles are provided in real audio format. </span>Below are a few characteristics of the dataset:</p>\n<ul class=\"cye-lm-tag\">\n<li>Total number of instances: 698</li>\n<li>Duration: ~30 s</li>\n<li>Total duration: ~20940 s</li>\n</ul>\n<p><strong>Size:</strong> 14GB (Compressed)</p>\n<p><strong>Number of Records:</strong> ~700 audio samples</p>\n<p><strong>SOTA</strong> : <a href=\"https://pdfs.semanticscholar.org/0cc2/952bf70c84e0199fcf8e58a8680a7903521e.pdf\" target=\"_blank\" rel=\"noopener\">A Multi-Model Approach To Beat Tracking Considering Heterogeneous Music Styles</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"https://labrosa.ee.columbia.edu/millionsong/\" target=\"_blank\" rel=\"noopener\">Million Song Dataset</a></h2>\n<p><span style=\"font-weight: 400;\">The </span><b>Million Song Dataset</b><span style=\"font-weight: 400;\"> is a freely-available collection of audio features and metadata for a million contemporary popular music tracks. </span><span style=\"font-weight: 400;\">Its purposes are:</span></p>\n<ul>\n<li style=\"font-weight: 400;\"><span style=\"font-weight: 400;\">To encourage research on algorithms that scale to commercial sizes</span></li>\n<li style=\"font-weight: 400;\"><span style=\"font-weight: 400;\">To provide a reference dataset for evaluating research</span></li>\n<li style=\"font-weight: 400;\"><span style=\"font-weight: 400;\">As a shortcut alternative to creating a large dataset with APIs (e.g. The Echo Nest&#8217;s)</span></li>\n<li style=\"font-weight: 400;\"><span style=\"font-weight: 400;\">To help new researchers get started in the MIR field</span></li>\n</ul>\n<p><span style=\"font-weight: 400;\">The core of the dataset is the feature analysis and metadata for one million songs</span><span style=\"font-weight: 400;\">. The dataset does not include any audio, only the derived features. The sample audio can be fetched from services like</span><a href=\"http://www.7digital.com\"> <span style=\"font-weight: 400;\">7digital</span></a><span style=\"font-weight: 400;\">, using </span><a href=\"https://github.com/tb2332/MSongsDB/tree/master/Tasks_Demos/Preview7digital\"><span style=\"font-weight: 400;\">code</span></a><span style=\"font-weight: 400;\"> provided by Columbia University.</span></p>\n<p><strong>Size: </strong>280 GB</p>\n<p><strong>Number of Records:</strong> PS &#8211; its a million songs!</p>\n<p><strong>SOTA</strong> :  <a href=\"http://www.ke.tu-darmstadt.de/events/PL-12/papers/08-aiolli.pdf\" target=\"_blank\" rel=\"noopener\">Preliminary Study on a Recommender System for the Million Songs Dataset Challenge</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://www.openslr.org/12/\" target=\"_blank\" rel=\"noopener\">LibriSpeech</a></h2>\n<p>This dataset is a large-scale corpus of around 1000 hours of English speech. The data has been sourced from audiobooks from the LibriVox project. It has been segmented and aligned properly. If you&#8217;re looking for a starting point, check out already prepared Acoustic models that are trained on this data set at <a href=\"http://www.kaldi-asr.org/downloads/build/6/trunk/egs/\" target=\"_blank\" rel=\"noopener\">kaldi-asr.org</a> and language models, suitable for evaluation, at <a href=\"http://www.openslr.org/11/\" target=\"_blank\" rel=\"noopener\">http://www.openslr.org/11/</a>.</p>\n<p><strong>Size:</strong> ~60 GB</p>\n<p><strong>Number of Records: </strong>1000 hours of speech</p>\n<p><strong>SOTA</strong> : <a href=\"https://arxiv.org/abs/1712.09444\" target=\"_blank\" rel=\"noopener\">Letter-Based Speech Recognition with Gated ConvNets</a></p>\n<p>&nbsp;</p>\n<h2><a href=\"http://www.robots.ox.ac.uk/~vgg/data/voxceleb/\" target=\"_blank\" rel=\"noopener\">VoxCeleb</a></h2>\n<p>VoxCeleb is a large-scale speaker identification dataset. It contains around 100,000 utterances by 1,251 celebrities, extracted from YouTube videos. The data is mostly gender balanced (males comprise of 55%). The celebrities span a diverse range of accents, professions and age. There is no overlap between the development and test sets. It&#8217;s an intriguing use case for isolating and identifying which superstar the voice belongs to.</p>\n<p><strong>Size:</strong> 150 MB</p>\n<p><strong>Number of Records:</strong> 100,000 utterances by 1,251 celebrities</p>\n<p><strong>SOTA</strong> : <a href=\"https://www.robots.ox.ac.uk/~vgg/publications/2017/Nagrani17/nagrani17.pdf\" target=\"_blank\" rel=\"noopener\">VoxCeleb: a large-scale speaker identification dataset</a></p>";

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // androidx.appcompat.app.AppCompatActivity, androidx.fragment.app.FragmentActivity, androidx.activity.ComponentActivity, androidx.core.app.ComponentActivity, android.app.Activity
    public void onCreate(Bundle bundle) {
        super.onCreate(bundle);
        setContentView(R.layout.activity_view_dataset);
        if (bundle == null) {
            Bundle extras = getIntent().getExtras();
            if (extras == null) {
                this.ID = 1;
                this.name = "";
            } else {
                this.ID = extras.getInt("ID");
                this.name = extras.getString("name");
            }
        } else {
            this.ID = ((Integer) bundle.getSerializable("ID")).intValue();
            this.name = (String) bundle.getSerializable("name");
        }
        setTitle(this.name);
        TextView textView = (TextView) findViewById(R.id.tv);
        textView.setMovementMethod(LinkMovementMethod.getInstance());
        int i = this.ID;
        if (i == 0) {
            if (Build.VERSION.SDK_INT >= 24) {
                textView.setText(Html.fromHtml(this.t0, 0));
                return;
            } else {
                textView.setText(Html.fromHtml(this.t0));
                return;
            }
        }
        if (i == 1) {
            if (Build.VERSION.SDK_INT >= 24) {
                textView.setText(Html.fromHtml(this.t1, 0));
                return;
            } else {
                textView.setText(Html.fromHtml(this.t1));
                return;
            }
        }
        if (i == 2) {
            if (Build.VERSION.SDK_INT >= 24) {
                textView.setText(Html.fromHtml(this.t2, 0));
            } else {
                textView.setText(Html.fromHtml(this.t2));
            }
        }
    }
}
