import {css} from '@emotion/react'
import {useState} from 'react'
import {IconDownload} from '@kensho/icons'

import Button from '../../components/Button'
import Page from '../../components/page/Page'
import Section from '../../components/Section'
import WikidataLogo from '../../assets/logos/Wikidata-logo.png'
import WikipediaLogo from '../../assets/logos/Wikipedia-logo.png'
import KenshoWikiChart from '../../assets/logos/kensho-wiki-chart.png'

import InfoDialog from './InfoDialog'

const containerCss = css`
  max-width: 800px;
  margin: 0 auto;
`

const imageContainerCss = css`
  display: flex;
  justify-content: center;
  margin-bottom: 20px;

  img {
    height: 100px;
    margin: 0 20px;
  }
`

const kenshoImageCss = css`
  display: flex;
  justify-content: center;
  margin-bottom: 20px;

  img {
    width: 100%;
    max-width: 500px;
  }
`

const buttonContainerCss = css`
  display: flex;
  justify-content: center;
  margin: 100px 0 200px;
`

const iconCss = css`
  margin-right: 12px;
  @media (max-width: 720px) {
    display: none;
  }
`

export default function WikimediaPage(): JSX.Element {
  const [dialogIsOpen, setDialogIsOpen] = useState(false)

  return (
    <Page
      title="Kensho Wikimedia Data for Natural Language Processing (NLP)"
      stickyNavbar
      navbarAction={() => setDialogIsOpen(true)}
    >
      <div css={containerCss}>
        <Section title="Wikimedia">
          <p>
            <a href="https://www.wikipedia.org/">Wikipedia</a>, the free encyclopedia, and{' '}
            <a href="https://www.wikidata.org/wiki/Wikidata:Main_Page">Wikidata</a>, the free
            knowledge base, are crowd-sourced projects supported by the{' '}
            <a href="https://wikimediafoundation.org/">Wikimedia Foundation</a>. These projects
            contribute to the{' '}
            <a href="https://wikimediafoundation.org/about/mission/">
              Wikimedia Foundation&apos;s mission
            </a>{' '}
            of empowering people to develop and disseminate educational content under a free
            license.
          </p>
          <p>
            They are also{' '}
            <a href="https://arxiv.org/search/advanced?advanced=&terms-0-operator=AND&terms-0-term=wikipedia&terms-0-field=abstract&terms-1-operator=OR&terms-1-term=wikidata&terms-1-field=abstract&classification-computer_science=y&classification-physics_archives=all&classification-include_cross_list=include&date-filter_by=all_dates&date-year=&date-from_date=&date-to_date=&date-date_type=submitted_date&abstracts=show&size=50&order=-announced_date_first">
              heavily utilized by computer science research groups
            </a>
            , especially those interested in natural language processing (NLP). The Wikimedia
            Foundation periodically releases snapshots of the raw data backing these projects, but
            these are in a variety of formats and were not designed for use in NLP research.
          </p>
        </Section>
        <div css={imageContainerCss}>
          <img src={WikidataLogo} alt="Wikidata Logo" />
          <img src={WikipediaLogo} alt="Wikipedia Logo" />
        </div>
        <Section title="Kensho Derived Wikimedia Dataset on Kaggle">
          <p>
            In the <a href="https://blog.kensho.com/research/home">Kensho R&D group</a>, we spend a
            lot of time downloading, parsing, and experimenting with raw Wikimedia data. We host a
            static subset of our derived Wikimedia dataset on Kaggle so that researchers can explore
            and prototype ideas.
          </p>
          <p>
            The{' '}
            <a href="https://www.kaggle.com/kenshoresearch/kensho-derived-wikimedia-data">
              Kaggle dataset
            </a>{' '}
            consists of three main components - the plain text of English Wikipedia articles,
            annotations that describe which text spans are links, and a compact sample of the
            Wikidata knowledge base. It was built using the English Wikipedia snapshot from 2019
            December 1 and the Wikidata snapshot from 2019 December 2. You can find a more detailed
            description and example notebooks on the Kaggle dataset page.
          </p>
        </Section>
        <div css={kenshoImageCss}>
          <img src={KenshoWikiChart} alt="Kensho Wikidata Graph" />
        </div>
        <Section title="Open Source Tools">
          <p>
            Our Wikimedia processing pipeline uses a mixture of our own open source packages and
            some open source packages we have contributed to:
          </p>
          <ul>
            <li>
              <a href="https://github.com/kensho-technologies/kwnlp-sql-parser">
                https://github.com/kensho-technologies/kwnlp-sql-parser
              </a>
            </li>
            <li>
              <a href="https://github.com/kensho-technologies/kwnlp-dump-downloader">
                https://github.com/kensho-technologies/kwnlp-dump-downloader
              </a>
            </li>
            <li>
              <a href="https://github.com/kensho-technologies/qwikidata">
                https://github.com/kensho-technologies/qwikidata
              </a>
            </li>
            <li>
              <a href="https://github.com/mediawiki-utilities/python-mwtext">
                https://github.com/mediawiki-utilities/python-mwtext
              </a>
            </li>
          </ul>
        </Section>
        <Section title="Contact Us">
          Please reach out to <a href="mailto:data@kensho.com">data@kensho.com</a> with any
          questions or concerns.
        </Section>
        <Section title="Terms & Conditions">
          The Kensho derived Wikimedia Datasets have a{' '}
          <a href="https://creativecommons.org/licenses/by-sa/3.0/">CC BY-SA 3.0</a> license.
        </Section>
        <div css={buttonContainerCss}>
          <Button onClick={() => setDialogIsOpen(true)}>
            <IconDownload size={32} css={iconCss} />
            Download the data
          </Button>
        </div>
      </div>
      <InfoDialog isOpen={dialogIsOpen} closeDialog={() => setDialogIsOpen(false)} />
    </Page>
  )
}
