

import React, {useState} from 'react';
import styled, {css} from 'styled-components';

import Dialog from '@material-ui/core/Dialog';
import DialogContent from '@material-ui/core/DialogContent';
import Button from '@material-ui/core/Button';
import Collapse from '@material-ui/core/Collapse';
import blue from '@material-ui/core/colors/blue';

import {FontAwesomeIcon} from '@fortawesome/react-fontawesome';

import {faArrowAltCircleRight, faCog, faAngleRight, faAngleDown} from
  '@fortawesome/free-solid-svg-icons';

import * as config from '../config';
import {useObservable} from '../utils';
import store from '../store';
import {device, Link, Code} from './styles';


const StyledDialogContent = styled(DialogContent)`&& {
  padding: 2em;
}`;

const tabletCopy = css`
  @media ${device.tablet} {
    font-size: 16px;
    line-height: 1.3;
  }
`;

const CopyBig = styled.div`
  font-family: Roboto;
  font-size: 34px;
  font-weight: 400;
  margin-bottom: 0.5em;
  ${tabletCopy}
`;

const CopyMedium = styled(CopyBig)`
  font-size: 18px;
  line-height: 1.5;
  margin-bottom: 1.5em;
  ${tabletCopy}
`;

const CopyRegular = styled(CopyBig)`
  margin-top: 1em;
  font-size: 16px;
  line-height: 1.5;
  ${tabletCopy}
`;

const ExploreButton = styled(Button)`&& {
  font-size: 1.5em;
}`;

const ExploreIcon = styled(FontAwesomeIcon)`
  margin-left: 0.5em;
`;

const Metric = styled.span`
  font-weight: bold;
  color: ${blue[800]};
`;

const Details = styled.div`
  margin-top: 1.5em;
`;

const DetailsToggle = styled.a`
  font-family: 'Roboto Mono';
  font-size: 14px;
  cursor: pointer;
  &:hover {
    color: ${blue[800]};
  }
`;

const Byline = styled.p`
  font-family: 'Roboto Mono';
  font-size: 12px;
  margin: 1em 0 0 0;
`;


const DetailsContent = () => <CopyRegular>
  <p><Link href={config.OSP_HOME_URL}>Open Syllabus</Link> collects and analyzes one of the largest databases of college course syllabi in the world &mdash; as of spring semester 2019, the archive holds 6.9M syllabi from 2,521 colleges and universities in 122 countries, with best coverage in the US, UK, Canada, and Australia. One of the core pieces of metadata extracted from the documents is information about which books and articles are assigned in each course. By analyzing this across millions of classes, we can start to get a bird's-eye view of the relationships among books, articles, and disciplines that emerges from the collective process of teaching and learning encoded by the syllabi.</p>

  <p>To get the text assignment data, we start with a database of 70M books and articles, and then identify a set of "assignment strings" in the syllabi, individual cases where one of these books or articles is being assigned in a class. This is two-step process. We first surface a set of high-recall candidate matches, identified by the presence of key tokens from the title and author of a known text &mdash; for example, all places where <Code>homer</Code> and <Code>iliad</Code> show up within ~10 tokens. Then, to scrub out false-positives, these candidates are validated using neural classifiers over tokens and characters in the contexts around the keyword matches.</p>

  <p>This results in a set of 25M validated text assignments. We can then treat each syllabus as grouping mechanism that implies a relationship between the set of texts assigned in the class, which in turn allows us to construct a "co-assignment" graph &mdash; whenever two books appear in the same course, we add (or increment) an edge between them. For this visualization, we skimmed off the set of texts with at least 20 assignments, which results in a graph with 160k nodes and 20M edges.</p>

  <p>We then used <Link href="https://snap.stanford.edu/node2vec/">node2vec</Link> to produce a 128-dimension embedding for each text. Finally, we PCA these embeddings down to 50 dimensions (which, heuristically, seems to produce more structured field clusters), and then use <Link href="https://arxiv.org/abs/1802.03426">UMAP</Link> to project down to 2 dimensions.</p>

  <p>To show all 180k texts interactively in the browser, we split the data into a "foreground" set of the 30k most-frequent texts, which are loaded in bulk onto the client and rendered via WebGL using <Link href="https://www.pixijs.com/">PixiJS</Link>; and then the remaining "background" set of 150k is queried as-needed from the server when the viewport zooms down to higher levels of magnification, using Elasticsearch geo queries. Once points are loaded into the client, <Link href="https://github.com/mourner/rbush">RBush</Link> and <Link href="https://github.com/mourner/rbush-knn">RBush-KNN</Link> are used to provide fast hover interactions over the set of visible points.</p>

  <p>Inspired by:</p>

  <ul>
    <li>Matt Miller's <Link href="https://www.nypl.org/blog/2014/07/31/networked-catalog">visualization of subject headings in the NYPL catalog</Link>.</li>
    <li><Link href="http://dhlab.yale.edu/projects/pixplot/">PixPlot</Link>, by Doug Duhaime at the Yale DH lab.</li>
    <li>The <Link href="http://projector.tensorflow.org/">Tensorboard Embedding Projector</Link>, from Google.</li>
    <li>Leland McInnes - UMAP, and his <Link href="https://twitter.com/leland_mcinnes">Twitter feed</Link>.</li>
  </ul>

  <Byline>by <Link href="https://twitter.com/clured">@clured</Link> + <Link href="https://twitter.com/opensyllabus">@opensyllabus</Link></Byline>
</CopyRegular>


const Info = () => {

  const visible = useObservable(store.INFO_VISIBLE, false);
  const plotReady = Boolean(useObservable(store.PLOT_EXTENT));

  const [detailsOpen, setDetailsOpen] = useState(false);

  const toggleDetailsOpen = () => {
    setDetailsOpen(!detailsOpen);
  }

  const close = () => {
    if (plotReady) store.hideInfo();
  }

  const exploreIcon = plotReady ?
    <ExploreIcon icon={faArrowAltCircleRight} /> :
    <ExploreIcon icon={faCog} spin />

  const detailsIcon = <FontAwesomeIcon
    icon={detailsOpen ? faAngleDown : faAngleRight} />

  return (
    <Dialog
      open={visible}
      onBackdropClick={close}
      onEscapeKeyDown={close}>
      <StyledDialogContent>

        <CopyBig>
          This visualization shows the <Metric>164,720</Metric> most frequently-assigned texts in the Open Syllabus corpus, a database of <Metric>6,059,459</Metric> college course syllabi.
        </CopyBig>

        <CopyMedium>
          This plot approximates the structure of the underlying citation graph (node2vec &rarr; UMAP). <strong>Books that are close together in the layout are assigned in similar courses.</strong>
        </CopyMedium>

        <ExploreButton
          variant="contained"
          color="primary"
          disabled={!plotReady}
          onClick={close}>
          {plotReady ? 'Explore' : 'Loading'}
          {exploreIcon}
        </ExploreButton>

        <Details>
          <DetailsToggle onClick={toggleDetailsOpen}>
            {detailsIcon} Technical details
          </DetailsToggle>
          <Collapse in={detailsOpen}>
            <DetailsContent />
          </Collapse>
        </Details>

      </StyledDialogContent>
    </Dialog>
  )

}


export default Info;
