diff --git a/Gemfile b/Gemfile index 70b8a49fb..030abf9db 100644 --- a/Gemfile +++ b/Gemfile @@ -38,6 +38,10 @@ gem 'puma', '~> 2.14.0' gem 'logstasher', '~> 0.6' +gem 'aws-sdk', '~> 2' + +# gem 'mongoid_fulltext' + group :development do gem 'dotenv-rails' end diff --git a/Gemfile.lock b/Gemfile.lock index 1e5a1a39f..8f27022cb 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -52,6 +52,12 @@ GEM tzinfo (~> 0.3.37) addressable (2.3.8) arel (4.0.2) + aws-sdk (2.2.14) + aws-sdk-resources (= 2.2.14) + aws-sdk-core (2.2.14) + jmespath (~> 1.0) + aws-sdk-resources (2.2.14) + aws-sdk-core (= 2.2.14) bcrypt (3.1.10) better_errors (2.1.1) coderay (>= 1.0.0) @@ -126,6 +132,7 @@ GEM jbuilder (1.5.3) activesupport (>= 3.0.0) multi_json (>= 1.2.0) + jmespath (1.1.3) jquery-rails (3.1.2) railties (>= 3.0, < 5.0) thor (>= 0.14, < 2.0) @@ -303,6 +310,7 @@ PLATFORMS DEPENDENCIES actionpack-action_caching active_model_serializers + aws-sdk (~> 2) better_errors binding_of_caller browserify-rails (~> 0.9.1) diff --git a/app/assets/javascripts/components/app-router.cjsx b/app/assets/javascripts/components/app-router.cjsx index ba99b6ce9..ef4e10bde 100644 --- a/app/assets/javascripts/components/app-router.cjsx +++ b/app/assets/javascripts/components/app-router.cjsx @@ -11,6 +11,10 @@ Verify = require './verify' # TODO Group routes currently not implemented GroupPage = require './group-page' GroupBrowser = require './group-browser' +FinalSubjectSetBrowser = require './final-subject-set-browser' +FinalSubjectSetPage = require './final-subject-set-page' +FinalSubjectSetDownload = require './final-subject-set-download' +GenericPage = require './generic-page' Project = require 'models/project.coffee' @@ -74,16 +78,36 @@ class AppRouter name={workflow.name + '_entire_page'} /> } + + { # Project-configured pages: project.pages?.map (page, key) => } + + { if project.downloadable_data + + } + + pattern = new RegExp('#/[A-z]*#(.*)') selectedID = "#{window.location.hash}".match(pattern) @@ -141,21 +164,11 @@ class AppRouter active: false heightStyle: "content" - navToggle:(e)-> - render: -> formatted_name = page.name.replace("_", " ") -
-

{formatted_name}

-
- { - if page.group_browser? && page.group_browser != '' -
- -
- } -
Last Update {page.updated_at}
-
+ base_key = page.key.split('/')[0] + nav = project.page_navs[base_key] + module.exports = AppRouter window.React = React diff --git a/app/assets/javascripts/components/final-subject-assertion.cjsx b/app/assets/javascripts/components/final-subject-assertion.cjsx new file mode 100644 index 000000000..1cf68b78f --- /dev/null +++ b/app/assets/javascripts/components/final-subject-assertion.cjsx @@ -0,0 +1,75 @@ +React = require 'react' +API = require '../lib/api' + +module.exports = React.createClass + displayName: 'FinalSubjectAssertion' + + propTypes: -> + assertion: React.PropTypes.object.isRequired + + getInitialState: -> + showingRegion: false + + toggleRegion: (e) -> + console.log "show: ", ! @state.showingRegion + @setState showingRegion: ! @state.showingRegion + + render: -> + + confidence = Math.round(100 * @props.assertion.confidence) + confidence_label = 'low' + confidence_label = 'med' if confidence >= 50 + confidence_label = 'high' if confidence >= 66 + confidence_label = 'max' if confidence == 100 + + status_label = @props.assertion.status.replace /_/, ' ' + +
+

{@props.assertion.name}

+ +
    + { for k of @props.assertion.data +
  • + { + cleaned_version = null + if @props.field && @props.field.value + cleaned_version = if (typeof @props.field.value) == 'object' then @props.field.value[k] else @props.field.value + null + } + {@props.assertion.data[k]} + { if cleaned_version && ('' + cleaned_version) != @props.assertion.data[k] + ( Interpretted as {if (typeof cleaned_version) == 'object' then cleaned_version.join(' x ') else cleaned_version } ) + } + { if k != 'value' + ({k.replace /_/g, ' '}) + } +
  • + } +
+
+
Confidence
+
{confidence}%
+
Status
+
{status_label}
+
Distinct Transcriptions
+
{@props.assertion.versions?.length || 0}
+
+ + { if @state.showingRegion + Hide {@props.project.term('mark')} + else + Show {@props.project.term('mark')} + } + + { + viewer_width = @props.assertion.region.width + scale = viewer_width / @props.assertion.region.width + s = + background: "url(#{@props.subject.location.standard}) no-repeat -#{Math.round(@props.assertion.region.x * scale)}px -#{Math.round(@props.assertion.region.y * scale)}px" + width: viewer_width + 'px' + height: (if @state.showingRegion then Math.round(@props.assertion.region.height * scale) else 0) + 'px' + classes = ['image-crop'] + classes.push 'showing' if @state.showingRegion +
+ } +
diff --git a/app/assets/javascripts/components/final-subject-set-browser.cjsx b/app/assets/javascripts/components/final-subject-set-browser.cjsx new file mode 100644 index 000000000..78fa1992a --- /dev/null +++ b/app/assets/javascripts/components/final-subject-set-browser.cjsx @@ -0,0 +1,194 @@ +React = require 'react' +{Navigation} = require 'react-router' +API = require '../lib/api' +Project = require 'models/project.coffee' +GenericButton = require('components/buttons/generic-button') +LoadingIndicator = require('components/loading-indicator') +Pagination = require('components/pagination') +GenericPage = require './generic-page' +FetchProjectMixin = require 'lib/fetch-project-mixin' + +module.exports = React.createClass + displayName: 'FinalSubjectSetBrowser' + + mixins: [Navigation, FetchProjectMixin] + + getInitialState:-> + entered_keyword: @props.query.keyword + selected_field: @props.query.field + searched_query: {} + fetching_keyword: null + current_page: @props.query.page ? 1 + more_pages: false + results: [] + project: null + + componentDidMount: -> + @checkQueryString() + + componentWillReceiveProps: (new_props) -> + @checkQueryString new_props + + checkQueryString: (props = @props) -> + if props.query.keyword + @fetch({keyword: props.query.keyword, field: props.query.field}, props.query.page) + + fetch: (query, page = 1) -> + return if ! @isMounted() + + if query.keyword != @state.searched_keyword || query.field != @state.selected_field || @props.current_page != page + + results = @state.results + results = [] if @state.searched_query?.keyword != query.keyword + @setState fetching_keyword: query.keyword, fetching_page: page, results: results, () => + per_page = 20 + params = + keyword: query.keyword + field: query.field + per_page: per_page + page: @state.fetching_page + + API.type('final_subject_sets').get(params).then (sets) => + @setState + results: sets + searched_query: + keyword: @props.query.keyword + field: @props.query.field + current_page: page + fetching_page: null + more_pages: sets?[0]?.getMeta('next_page') + fetching_keyword: null + + handleKeyPress: (e) -> + if @isMounted() + + if [13].indexOf(e.keyCode) >= 0 # ENTER: + @search e.target.value + + search: (keyword, search_field) -> + keyword = @state.entered_keyword # refs.search_input?.getDOMNode().value.trim() unless keyword? + field = @state.selected_field # @refs.search_field?.getDOMNode().value.trim() + + @transitionTo "final_subject_sets", null, {keyword: keyword, field: field} + + loadMore: -> + @fetch @state.searched_query, @state.current_page + 1 + + handleChange: (e) -> + @setState entered_keyword: e.target.value + + handleFieldSelect: (e) -> + @setState selected_field: e.target.value + + + renderPagination: -> + + + renderSearch: -> +
+

Preview the data by searching by keyword below:

+
+ { if @state.project.export_document_specs?[0]?.spec_fields + + } +
+ + +
+
+ + { if @state.fetching_keyword + + + else if @state.searched_query?.keyword && @state.results.length == 0 +

No matches yet for "{@state.searched_query.keyword}"

+ + else if @state.results.length > 0 +
+

Found {@state.results[0].getMeta('total')} matches

+ +
    + { for set in @state.results + url = "/#/#{@state.project.data_url_base}/browse/#{set.id}?keyword=#{@state.searched_query.keyword}&field=#{@state.searched_query.field ? ''}" + matches = [] + + safe_keyword = (w.replace(/\W/g, "\\$&") for w in @state.searched_query.keyword.toLowerCase().replace(/"/g,'').split(' ')).join("|") + safe_keyword = (c for c in safe_keyword).join ",?" + regex = new RegExp("(#{safe_keyword})", 'gi') + + # If a specific field searched, always show that: + if @state.searched_query?.field + term = set.search_terms_by_field[@state.searched_query.field]?.join("; ") + matches.push(field: @state.searched_query.field, term: term) if term + + # Otherwise show all fields that match + else + for k of set.search_terms_by_field + matches.push(field: k, term: v) for v in set.search_terms_by_field[k] when v.match(regex) + +
  • +
    + + + +
    +
    + { for m,i in matches[0...2] + + } +
    +
  • + } +
+ + { @renderPagination() if @state.results.length > 0 } +
+ } +
+ + + render: -> + return null if ! @state.project? + + data_nav = @state.project.page_navs[@state.project.data_url_base] + + +
+ +

Browse

+ + { if ! @state.project.downloadable_data +
+

Data Exports Not Available

+

Sorry, but public data exports are not enabled for this project yet.

+
+ + else +
+ { if ! @state.searched_query?.keyword +

Participants have made {@state.project.classification_count.toLocaleString()} contributions to {@state.project.title} to date. This project periodically builds a merged, anonymized dump of that data, which is made public here.

+ } + + { @renderSearch() } + +
+ } +
+
+ diff --git a/app/assets/javascripts/components/final-subject-set-download.cjsx b/app/assets/javascripts/components/final-subject-set-download.cjsx new file mode 100644 index 000000000..059803a9a --- /dev/null +++ b/app/assets/javascripts/components/final-subject-set-download.cjsx @@ -0,0 +1,39 @@ +React = require 'react' +FetchProjectMixin = require 'lib/fetch-project-mixin' +GenericPage = require './generic-page' + +module.exports = React.createClass + displayName: 'FinalSubjectSetDownload' + + mixins: [FetchProjectMixin] + + getInitialState: -> + project: null + + render: -> + return null if ! @state.project? + + data_nav = @state.project.page_navs['data'] + + + { if ! @state.project.downloadable_data +
+

Data Exports Not Available

+

Sorry, but public data exports are not enabled for this project yet.

+
+ else +
+

Download

+ +

Participants have made {@state.project.classification_count.toLocaleString()} contributions to {@state.project.title} to date. This project periodically builds a merged, anonymized dump of that data, which is made public here.

+ +

Download Latest Raw Data

+ +

For help interpretting the data, see Scribe WIKI on Data Exports.

+ +

To browse past releases and/or to be notified when new releases are made, you may wish to subscribe to the ATOM Feed of Data Releases

+ +
+ } +
+ diff --git a/app/assets/javascripts/components/final-subject-set-page.cjsx b/app/assets/javascripts/components/final-subject-set-page.cjsx new file mode 100644 index 000000000..5fb1b9949 --- /dev/null +++ b/app/assets/javascripts/components/final-subject-set-page.cjsx @@ -0,0 +1,146 @@ +React = require 'react' +API = require '../lib/api' +GenericPage = require './generic-page' +FetchProjectMixin = require 'lib/fetch-project-mixin' + +FinalSubjectAssertion = require('components/final-subject-assertion') + + +module.exports = React.createClass + displayName: 'FinalSubjectSetPage' + + mixins: [FetchProjectMixin] + + getInitialState:-> + set: null + tab: null + tabs: [] + + componentDidMount: -> + API.type("final_subject_sets").get(@props.params.final_subject_set_id).then (set) => + tabs = [] + tabs.push 'export-doc' if set.export_document + tabs.push 'source-metadata' if set.meta_data + tabs.push 'assertions' + @setState + set: set + tab: tabs[0] + tabs: tabs + + showExportDoc: -> + @showTab 'export-doc' + + showAssertions: -> + @showTab 'assertions' + + showTab: (which) -> + @setState tab: which + + + render: -> + return null if ! @state.set || ! @state.project + + data_nav = @state.project.page_navs[@state.project.data_url_base] + + +
+
+ + Back + + Download Item Raw Data + { if @state.set.export_document? && (display_field = @state.set.export_document.export_fields[0])? +

{display_field.name} {display_field.value}

+ else +

Record {@state.set.id}

+ } + + + + { if @state.tabs.length > 1 + + } + + { if @state.tab == 'export-doc' && @state.set.export_document +
+

These data points represent numerous individual classifications that have been merged and lightly cleaned up to adhere to {@state.project.title}'s data model.

+ + { for field,i in @state.set.export_document.export_fields + if field.assertion_ids + assertion = subject = null + for s in @state.set.subjects + for a in s.assertions + if field.assertion_ids.indexOf(a.id) >= 0 + assertion = a + subject = s + if assertion && subject +
+ +
+ } +
+ } + + { if @state.tab == 'assertions' +
+

These data points represent all distinct assertions made upon this {@props.project.term('subject set')} - without cleanup. Each assertion may represent several distinct contributions.

+
    + { for subject in @state.set.subjects +
  • +
      + { + # Sort assertions by ExportDocumentSpec field order: + field_name_order = (field.name for field in @props.project.export_document_specs[0].spec_fields) + assertions = subject.assertions.sort (a1,a2) -> + # If field name doesn't appear in spec, sort it last (i.e. index 1000): + ord1 = if field_name_order.indexOf(a1.name) >= 0 then field_name_order.indexOf(a1.name) else 1000 + ord2 = if field_name_order.indexOf(a2.name) >= 0 then field_name_order.indexOf(a2.name) else 1000 + if ord1 < ord2 + -1 + else + 1 + + null + } + { for assertion,i in assertions when assertion.name +
    • + +
    • + } +
    + +
  • + } +
+
+ } + + { if @state.tab == 'source-metadata' + +
+

This metadata was imported alongside the source images at the beginning of the project and may include high res source URIs and processing details.

+ +
+ { for k,v of @state.set.meta_data +
+
{k.split('_').map( (v) => v.capitalize() ).join(' ')}
+ { if v.match(/https?:\/\//) +
{v}
+ else +
{v}
+ } +
+ } +
+
+ } +
+
+
+ diff --git a/app/assets/javascripts/components/generic-page.cjsx b/app/assets/javascripts/components/generic-page.cjsx new file mode 100644 index 000000000..077c62abc --- /dev/null +++ b/app/assets/javascripts/components/generic-page.cjsx @@ -0,0 +1,75 @@ +React = require("react") +FetchProjectMixin = require 'lib/fetch-project-mixin' + +module.exports = React.createClass + displayName: "GenericPage" + + mixins: [FetchProjectMixin] + + getInitialState: -> + project: null + + getDefaultProps: -> + key: null + title: null + content: null + nav: null + footer: null + current_nav: location.hash + + propTypes: + title: React.PropTypes.string + content: React.PropTypes.string + nav: React.PropTypes.string + footer: React.PropTypes.string + + # Returns true if given nav link href appears to link to this page + isCurrentNavLink: (href) -> + # Known limitation: This will will assume equivalency of two URLs that don't have hashes + # But use of the nav assumes hashes. A nav item really shouldn't link to a different domain/ctrl endpoint + href.replace(/.*#/, '') == @props.current_nav.replace(/.*#/,'') + + componentDidMount: -> + # Find nav link matching @props.current_nav + matching = (el for el in $(React.findDOMNode(this)).find('.custom-page-nav li a') when @isCurrentNavLink($(el).attr('href')) ) + $(matching[0]).parent('li').addClass('current') if matching.length > 0 + + htmlContent: -> + content = @props.content + + replacements = + "project.classification_count": @state.project?.classification_count ? '__' + "project.latest_export.created_at": @state.project?.latest_export?.created_at ? '__' + "project.root_subjects_count": @state.project?.root_subjects_count ? '__' + "project.title": @state.project?.title ? '__' + + for pattern, replacement of replacements + pattern = new RegExp("{{#{pattern}}}", 'gi') + + # assume, if it's an int, we want to comma format it: + if typeof(replacement) == 'number' + replacement = replacement.toLocaleString() + # If it's a date, parse it and make it human: + if replacement.match /^\d{4}-\d{2}/ + replacement = moment(replacement, moment.ISO_8601).calendar() + + content = content.replace pattern, replacement + + marked(content) + + render: -> + +
+

{@props.title}

+
+ { if @props.nav +
+ } + { if @props.content? +
+ } + { @props.children if @props.children? } +
+
{@props.footer}
+
+ diff --git a/app/assets/javascripts/components/no-more-subjects-modal.cjsx b/app/assets/javascripts/components/no-more-subjects-modal.cjsx new file mode 100644 index 000000000..1e298e15f --- /dev/null +++ b/app/assets/javascripts/components/no-more-subjects-modal.cjsx @@ -0,0 +1,50 @@ +React = require 'react' +DraggableModal = require 'components/draggable-modal' +GenericButton = require 'components/buttons/generic-button' + +module.exports = React.createClass + displayName: 'NoMoreSubjectsModal' + + getDefaultProps: -> + header: 'Nothing more to do here' + + propTypes: + project: React.PropTypes.object.isRequired + header: React.PropTypes.string.isRequired + workflowName: React.PropTypes.string.isRequired + + render: -> + next_workflow = @props.project.workflowWithMostActives @props.workflowName + next_href = "/" + next_label = 'Continue' + + if next_workflow? + next_href = "/#/" + next_workflow.name + + else if @props.project.downloadable_data + next_href = "/#/data" + next_label = "Explore Data" + + } + > + { if next_workflow? +

+ Currently, there are no {@props.project.term('subject')}s for you to {@props.workflowName}. + Try {next_workflow.name.capitalize()} instead! +

+ + else +
+

There's nothing more to transcribe in {@props.project.title}!! 🎉 🎉 🎉 +

+

Thank you to all the amazing volunteers who worked on this project.

+ + { if @props.project.downloadable_data +

The {@props.project.root_subjects_count.toLocaleString()} records can be explored via the Data tab.

+ } +
+ } +
+ diff --git a/app/assets/javascripts/components/pagination.cjsx b/app/assets/javascripts/components/pagination.cjsx new file mode 100644 index 000000000..998508a10 --- /dev/null +++ b/app/assets/javascripts/components/pagination.cjsx @@ -0,0 +1,63 @@ +React = require 'react' + +module.exports = React.createClass + displayName: 'Pagination' + + getDefaultProps: -> + max_links: 12 + + pageUrl: (page) -> + base = location.href.replace /(&|\?)page=[^&]+/, '' + "#{base}#{if base.indexOf("?") >= 0 then '&' else '?'}page=#{page}" + + render: -> + # Build array of page numbers to show.. + + pages = [] + if @props.total_pages <= @props.max_links + # If fewer pages than max, show them all: + pages = [1..@props.total_pages] + + else + # Too many to show, so truncate.. + # Assuming we want three groups of truncated links (first few, last few, + # and a middle group centered around current page).. + chunk_size = @props.max_links / 3 - 1 + for p in [1..@props.total_pages] + # Add first few pages: + pages.push p if p <= chunk_size + # Add a middle group of pages around the current page: + pages.push p if Math.abs(@props.current_page - p) <= chunk_size/2 && pages.indexOf(p)<0 + # Bookend with last few pages: + pages.push p if p > @props.total_pages - chunk_size && pages.indexOf(p)<0 + + # Don't show anything if no usable links: + return null if pages.length < 2 + + page_links = [] + + # Add leading < link + page_links.push({label: "<", page: @props.prev_page, title: "Previous", disabled: false}) if @props.prev_page + + for page,i in pages + # Add divider if this page is the beginning of a chunk: + page_links.push({dotdotdot: true}) if i > 0 && pages[i-1] != page-1 + # Add page link: + page_links.push({label: page, page: page, title: "Page #{page}", disabled: page == @props.current_page}) + + # Add final > link + page_links.push({label: ">", page: @props.next_page, title: "Next", disabled: false}) if @props.next_page? + + +
    + { for link, i in page_links + if link.dotdotdot? +
  • + + else if link.disabled +
  • + + else +
  • + } +
diff --git a/app/assets/javascripts/lib/fetch-project-mixin.cjsx b/app/assets/javascripts/lib/fetch-project-mixin.cjsx new file mode 100644 index 000000000..c200d63f8 --- /dev/null +++ b/app/assets/javascripts/lib/fetch-project-mixin.cjsx @@ -0,0 +1,9 @@ +API = require './api' +Project = require 'models/project.coffee' + +module.exports = + componentDidMount: -> + API.type('projects').get('current').then (result) => + @setState project: new Project(result) + + diff --git a/app/assets/javascripts/models/project.coffee b/app/assets/javascripts/models/project.coffee index 39dfc7e37..f0eb372cd 100644 --- a/app/assets/javascripts/models/project.coffee +++ b/app/assets/javascripts/models/project.coffee @@ -3,8 +3,18 @@ class Project constructor: (obj) -> for k,v of obj @[k] = v + @data_url_base = 'data_new' term: (t) -> @terms_map[t] ? t + workflowWithMostActives: (not_named = '') -> + (w for w in @mostActiveWorkflows() when w.name != not_named)[0] + + mostActiveWorkflows: -> + @workflows.filter((w) -> w.active_subjects > 0 ).sort (w1, w2) -> + return -1 if w1.active_subjects > w2.active_subjects + 1 + + module.exports = Project diff --git a/app/assets/stylesheets/application.styl b/app/assets/stylesheets/application.styl index 727a45f5f..0680948cd 100644 --- a/app/assets/stylesheets/application.styl +++ b/app/assets/stylesheets/application.styl @@ -32,6 +32,8 @@ @import './groups.styl' @import './group-browser.styl' +@import './final-subject-set-browser.styl' + /* MARK STYLES */ @import './components/mark/point-tool.styl' @import './components/mark/rectangle-tool.styl' diff --git a/app/assets/stylesheets/common.styl b/app/assets/stylesheets/common.styl index d4af2d8ae..dfdf334bc 100644 --- a/app/assets/stylesheets/common.styl +++ b/app/assets/stylesheets/common.styl @@ -292,3 +292,42 @@ div.home-page margin-bottom 50px overflow: hidden min-height 800px + + +.custom-page-inner-wrapper + flexbox(flex) + min-height 300px + // overflow hidden // just to clear floats + +.custom-page-nav + flex(grow: 1) + min-width 200px + + ul + list-style none + padding 0 + + li + font-size 16px + line-height 16px + margin-left 0 + margin-bottom 15px + padding-left 15px + border-left 2px solid transparent + + a + color SECONDARY_NORMAL + text-decoration none + + &:hover + color SECONDARY_HOVER + + &.current + font-weight bold + border-left 2px solid MAIN_HIGHLIGHT + + + +custom-page-body + overflow hidden + flex(grow: 10) diff --git a/app/assets/stylesheets/final-subject-set-browser.styl b/app/assets/stylesheets/final-subject-set-browser.styl new file mode 100644 index 000000000..6ba29a8e1 --- /dev/null +++ b/app/assets/stylesheets/final-subject-set-browser.styl @@ -0,0 +1,238 @@ +.final-subject-set-browser + flex(grow: 1) + max-width 700px + + .pagination + list-style none + + li + display inline-block + margin-right 0.5em + line-height 1.2em + + a, span + @extends .standard-button + font-weight bold + color white + padding 0.2em 0.6em + + a + text-decoration none + + &.disabled + opacity 0.5 + + &.divider:after + content "..." + + + .tabs + padding 0 + margin 0 + margin-top 20px + list-style none + border-bottom solid #bbb 1px + + li + display inline-block + border solid #bbb + border-width 1px 1px 0 1px + margin 0 0 0 10px + font-size 1.4em + border-radius 5px 5px 0 0 + + a + padding 10px 30px + text-decoration none + color gray + + &.active + background-color rgba(255,255,255,0.30) + + a + font-weight bold + color TERTIARY_NORMAL + + &.page-content + h2 + text-align left + pading-bottom 10px + + ul + list-style none + padding-left 0 + + .json-link + width auto + float right + margin-left 10px + + + // Search page: + + form + padding-bottom 1em + + input + vertical-align middle + + input#data-search + font-size 2em + margin-right 10px + + select + font-size 1.3em + + + .loading-indicator + width 100px + margin 100px auto + color black + position inherit + display block + + ul.results + li + display inline-block + margin 0 10px 10px 0 + width 330px + height 150px + overflow hidden + + a + color #2b3a42 + text-decoration none + + .image + width 160px + float left + + .matches + width 160px + float left + font-size 14px + + .match + padding-bottom 0.5em + line-height 1.5em + + .field + font-weight bold + + &:after + content ":" + + .term + padding-left 1em + + em + font-style unset + font-weight bold + color TERTIARY_NORMAL + + + // Set page: + .final-subject-set-page + img.standard-image + max-width 600px + + h3 + border-left none + padding 0 + line-height normal + font-size 28px + clear both + margin-bottom 0 + + ul.assertion-data, dl.assertion-properties + margin 4px 0 4px 10px + + ul.assertion-data + clear left + + li + color gray + + span.value + font-weight bold + color #2b3a42 + + span.cleaned-version + padding-left 1em + + span.data-key + margin-left 20px + + dl.assertion-properties + margin-left 10px + clear left + font-size 14px + + dt,dd + display inline + color gray + + dt + margin 0 2px 0 0 + &:after + content ":" + + dd + margin 0 20px 0 0 + + &.confidence + color red + + &.status-complete + color green + + &.status + color TERTIARY_NORMAL + + &.confidence-med + color TERTIARY_NORMAL + + &.confidence-max, &.confidence-high + color green + + &.confidence-low + color #FC2260 + + .show-region-link + font-size 12px + display block + margin-left 10px + text-decoration none + + + .image-crop + opacity 0.7 + margin-left 10px + max-width 700px + + transition: height 0.3s ease-out + + &.showing + border solid gray 1px + + + &:hover + opacity 1 + + a.back + font-size: 1.3em; + text-decoration: none; + + &:before + content "< " + + dl.source-metadata + dl + margin-top 10px + + dt + font-size 20px + + dd + margin-left 0 + margin-bottom 10px diff --git a/app/controllers/admin/data_controller.rb b/app/controllers/admin/data_controller.rb index 5509f670f..2eafd6459 100644 --- a/app/controllers/admin/data_controller.rb +++ b/app/controllers/admin/data_controller.rb @@ -1,28 +1,16 @@ class Admin::DataController < Admin::AdminBaseController def index - @num_complete = Subject.complete.count - @num_non_root = Subject.active_non_root.count - end - - def download - if params[:download_format] - redirect_to "#{admin_data_download_path}.#{params[:download_format]}?download_status=#{params[:download_status]}" - - else - - if params[:download_status] == 'complete' - @subjects = Subject.complete - respond_to do |format| - format.json {render json: CompleteSubjectsSerializer.new(@subjects)} - end - - else - @sets = SubjectSet.all - respond_to do |format| - format.json {render json: FinalDataSerializer.new(@sets)} + @project = Project.current + if request.post? + if (proj = params[:project]) + if (v = proj[:downloadable_data]) + new_val = v == '1' + puts "updating project: #{new_val} because #{v}" + @project.update_attributes downloadable_data: new_val end end end - end + @export = FinalDataExport.most_recent.first + end end diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index b7395a642..624249485 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -10,6 +10,10 @@ def require_user! current_or_guest_user(create_if_missing = true) end + def get_bot_user_from_request(request) + BotUser.by_auth request.headers + end + # Get currently logged-in user, creating guest as indicated def current_or_guest_user(create_if_missing = false) if current_user diff --git a/app/controllers/classifications_controller.rb b/app/controllers/classifications_controller.rb index 6aa852983..f2e2bc94b 100644 --- a/app/controllers/classifications_controller.rb +++ b/app/controllers/classifications_controller.rb @@ -4,29 +4,64 @@ class ClassificationsController < ApplicationController def create - user = require_user! + # Is it a bot? + user = get_bot_user_from_request request - workflow_id = BSON::ObjectId.from_string params["classifications"]["workflow_id"] + user = require_user! if user.nil? + + workflow_id = params["classifications"]["workflow_id"] ? params["classifications"]["workflow_id"] : nil task_key = params["classifications"]["task_key"] annotation = params["classifications"]["annotation"] annotation = {} if annotation.nil? - started_at = params["classifications"]["metadata"]["started_at"] - finished_at = params["classifications"]["metadata"]["finished_at"] + + started_at = nil + finished_at = nil + if params["classifications"]["metadata"] + started_at = params["classifications"]["metadata"]["started_at"] + finished_at = params["classifications"]["metadata"]["finished_at"] + + else + started_at = finished_at = Time.new.strftime("%Y%m%dT%H%M%S%z") + end + subject_id = params["classifications"]["subject_id"] user_agent = request.headers["HTTP_USER_AGENT"] - @result = Classification.create( - workflow_id: workflow_id, - subject_id: subject_id, - location: location, + # If workflow not found by id, maybe it was specified by name? + if workflow_id.nil? && ! params["workflow"].nil? + workflow = Workflow.find_by name: params["workflow"]["name"] + workflow_id = workflow.id + end + + workflow_id = BSON::ObjectId.from_string workflow_id if ! workflow_id.nil? + + # If user is a bot, consider creating the subject on the fly: + if user.is_a?(BotUser) && subject_id.nil? && (standard_url = params["subject"]["location"]["standard"]) + subject_id = Subject.find_or_create_root_by_standard_url(standard_url).id + end + + h = { annotation: annotation, - started_at: started_at, - finished_at: finished_at, - user_agent: user_agent, + location: location, + subject_id: subject_id, task_key: task_key, - user: user - ) + workflow_id: workflow_id, + user_id: user.id + } + if (@result = Classification.find_by_props(h)).nil? + @result = Classification.create( + workflow_id: workflow_id, + subject_id: subject_id, + location: location, + annotation: annotation, + started_at: started_at, + finished_at: finished_at, + user_agent: user_agent, + task_key: task_key, + user: user + ) + end render json: @result end diff --git a/app/controllers/final_data_controller.rb b/app/controllers/final_data_controller.rb new file mode 100644 index 000000000..8cc1b4b39 --- /dev/null +++ b/app/controllers/final_data_controller.rb @@ -0,0 +1,9 @@ +class FinalDataController < ApplicationController + before_filter :ensure_data_downloadable + + def ensure_data_downloadable + project = Project.current + return render text: 'Data is not yet publicly available for this Scribe project.', status: 404 if ! project.downloadable_data + end + +end diff --git a/app/controllers/final_data_exports_controller.rb b/app/controllers/final_data_exports_controller.rb new file mode 100644 index 000000000..fbd0057af --- /dev/null +++ b/app/controllers/final_data_exports_controller.rb @@ -0,0 +1,23 @@ +class FinalDataExportsController < FinalDataController + + def latest + puts "FinalDataExport.most_recent.first: #{FinalDataExport.most_recent.first.inspect}" + show FinalDataExport.most_recent.first + end + + def show(export = nil) + export = FinalDataExport.find(params[:id]) unless export + return render text: 'Not found.', status: 404 if export.nil? + + redirect_to export.path + end + + def index + @exports = FinalDataExport.most_recent.limit(20) + + respond_to do |format| + format.atom + end + end + +end diff --git a/app/controllers/final_subject_sets_controller.rb b/app/controllers/final_subject_sets_controller.rb new file mode 100644 index 000000000..2166a8192 --- /dev/null +++ b/app/controllers/final_subject_sets_controller.rb @@ -0,0 +1,67 @@ +class FinalSubjectSetsController < FinalDataController + respond_to :json + + def show + @set = FinalSubjectSet.find params[:id] + respond_with FinalSubjectSetSerializer.new @set + end + + def index + per_page = get_int :per_page, 20, (0..50) + page = get_int :page, 1 + + field = params[:field] + keyword = params[:keyword] + + + @sets = Project.current.final_subject_sets.page(page).per(per_page) + if ! field.blank? && (field_spec = FinalSubjectSet.export_spec_fields.select { |f| f.name == field }.first) + + match_exact = ['numeric','monetary','date'].include? field_spec.format + if field_spec && field_spec.format + split = "(-|to)" + split = " #{split} " if field_spec.format == 'date' + if keyword.match /\w+ ?#{split} ?\w*/i + values = keyword.split(/#{split}/i) + values = [values.first, values.last] + values = parse_range values, field_spec.format + puts "values: #{values}" + + @sets = @sets.by_export_field_range(field, values) + + # specially handle searching by year: + elsif field_spec.format == 'date' && keyword.match(/^\d+$/) + values = parse_range [keyword,keyword], field_spec.format + @sets = @sets.by_export_field_range(field, values) + + # search by exact val: + else + value = parse_keyword keyword, field_spec.format + @sets = value.blank? ? [] : @sets.by_export_field(field, value, match_exact) + end + else + value = parse_keyword keyword, field_spec.format + @sets = value.blank? ? [] : @sets.by_export_field(field, value, match_exact) + end + + else + @sets = @sets.where({"$text" => {"$search" => keyword} } ) if keyword + end + + respond_with GenericResultSerializer.new(@sets).serializable_hash base_url: request.url + end + + def parse_range(values, format) + parsed = values.map { |v| Export::DocumentBuilder.apply_format v, format } + if format == 'date' + parsed[0] = Export::DocumentBuilder.apply_format("#{values.first}-01-01",format) if parsed.first.nil? + parsed[1] = Export::DocumentBuilder.apply_format("#{values.last}-12-31",format) if parsed.last.nil? + end + parsed + end + + def parse_keyword(value, format) + parsed = Export::DocumentBuilder.apply_format value, format + parsed + end +end diff --git a/app/controllers/projects_controller.rb b/app/controllers/projects_controller.rb index 4762c0951..76fe0f488 100644 --- a/app/controllers/projects_controller.rb +++ b/app/controllers/projects_controller.rb @@ -3,7 +3,12 @@ class ProjectsController < ApplicationController caches_action :index, :cache_path => "projects/index" + # TODO deprecate this nonsensical usage. Use /projects/current instead def index + current + end + + def current respond_with Project.current end @@ -12,14 +17,5 @@ def stats render :json => {:project => project, :stats => project.stats} end -=begin - def project_css - render text: Project.current.styles - end - - def project_js - render text: Project.current.custom_js - end -=end end diff --git a/app/models/bot_user.rb b/app/models/bot_user.rb new file mode 100644 index 000000000..abbf522c9 --- /dev/null +++ b/app/models/bot_user.rb @@ -0,0 +1,47 @@ +class BotUser < User + + AUTH_HEADER = 'HTTP_BOT_AUTH' + + # Create bot user with name + def self.create(name) + user = find_or_initialize_by name: name, role: 'bot' + token = '' + if ! user.persisted? + token = user.reset_token! + end + {user: user, token: token} + end + + # Immediately overwrite existing token with a new one + def reset_token! + token = Devise.friendly_token[0,20] + self.password = self.password_confirmation = token + self.email = "#{name}@scribe" + save! validate: false + token + end + + def self.pack_auth_header(user_id, token) + [user_id, token].join ":" + end + + def self.unpack_auth_header(str) + str.split ":" + end + + # Given hash of headers, return bot user if a header authenticates + def self.by_auth(headers) + # No header? Fail. + return nil if headers[AUTH_HEADER].blank? + + # Fail if header doesn't have two values: + parts = unpack_auth_header headers[AUTH_HEADER] + return nil if parts.size != 2 + + # Get user by name and auth using token: + user = find parts[0] + return nil if ! user.valid_password? parts[1] + + user + end +end diff --git a/app/models/classification.rb b/app/models/classification.rb index 99f91f1f3..999f14eb6 100644 --- a/app/models/classification.rb +++ b/app/models/classification.rb @@ -11,6 +11,11 @@ class Classification field :finished_at field :user_agent + field :data_md5 + + before_create :generate_data_md5 + + belongs_to :workflow, :foreign_key => "workflow_id" belongs_to :user belongs_to :subject, foreign_key: "subject_id", inverse_of: :classifications @@ -37,7 +42,8 @@ def generate_new_subjects def check_for_retirement_by_classification_count(subject) if workflow.generates_subjects_method == "collect-unique" - if subject.classification_count >= workflow.generates_subjects_after + # Must divide number of classifications by the number of distinct generated subjects (otherwise 3 generated subjects may hang out 'inactive' in verify waiting for 3 additional classifications that will never come..) + if subject.classification_count / subject.secondary_subject_count >= workflow.generates_subjects_after subject.retire! end end @@ -86,9 +92,15 @@ def increment_subject_classification_count end if self.task_key == "flag_bad_subject_task" - subject.increment_flagged_bad_count_by_one - # Push user_id onto Subject.deleting_user_ids if appropriate - Subject.where({id: subject.id}).find_and_modify({"$addToSet" => {deleting_user_ids: user_id.to_s}}) + # If deleting user is creator, immediately change status to bad + if subject.created_solely_by?(user) + subject.bad! + + else + subject.increment_flagged_bad_count_by_one + # Push user_id onto Subject.deleting_user_ids if appropriate + Subject.where({id: subject.id}).find_and_modify({"$addToSet" => {deleting_user_ids: user_id.to_s}}) + end end if self.task_key == "flag_illegible_subject_task" @@ -110,6 +122,26 @@ def to_s "#{workflow_name} Classification (#{ ann.blank? ? task_key : ann})" end + def generate_data_md5 + props = { + annotation: annotation, + location: location, + subject_id: subject_id, + task_key: task_key, + workflow_id: workflow_id + } + self.data_md5 = self.class.data_md5_for_props(props) + end + + def self.find_by_props(props) + find_by data_md5: data_md5_for_props(props) + end + + def self.data_md5_for_props(props) + Digest::MD5.hexdigest(props.to_query) + end + + # Returns hash mapping distinct values for given field to matching count: def self.group_by_hour(match={}) agg = [] diff --git a/app/models/concerns/group_by_field.rb b/app/models/concerns/group_by_field.rb new file mode 100644 index 000000000..6ca51a415 --- /dev/null +++ b/app/models/concerns/group_by_field.rb @@ -0,0 +1,19 @@ +module GroupByField + extend ActiveSupport::Concern + + module ClassMethods + + # Returns hash mapping distinct values for given field to matching count: + def group_by_field(field, match={}) + puts "group #{collection.inspect} by #{field}" + agg = [] + agg << {"$match" => match } if match + agg << {"$group" => { "_id" => "$#{field.to_s}", count: {"$sum" => 1} }} + collection.aggregate(agg).inject({}) do |h, p| + h[p["_id"]] = p["count"] + h + end + end + + end +end diff --git a/app/models/export/document.rb b/app/models/export/document.rb new file mode 100644 index 000000000..3c861d668 --- /dev/null +++ b/app/models/export/document.rb @@ -0,0 +1,36 @@ +class Export::Document + include Mongoid::Document + + field :name, type: String + + belongs_to :spec, class_name: 'Export::Spec::Document' + embeds_many :export_fields, class_name: 'Export::DocumentField' + embedded_in :final_subject_set + + def self.from_set(set, specs) + specs.each do |spec| + return Export::DocumentBuilder.new(set, spec).export_document + end + end + + def data + export_fields.inject({}) do |h, f| + if h[f.name] + h[f.name] = [h[f.name]] if ! h[f.name].is_a?(Array) + h[f.name] << f.data + else + h[f.name] = f.data + end + h + end + end + + def to_s + ret = [] + ret << "#{spec.name}" + export_fields.each do |field| + ret << " #{field.to_s(2)}" + end + ret.join "\n" + end +end diff --git a/app/models/export/document_builder.rb b/app/models/export/document_builder.rb new file mode 100644 index 000000000..9a7310a99 --- /dev/null +++ b/app/models/export/document_builder.rb @@ -0,0 +1,185 @@ +class Export::DocumentBuilder + + def initialize(set, spec) + @set = set + @spec = spec + end + + def export_document + doc = Export::Document.create name: @spec.name, final_subject_set: @set, spec: @spec + @spec.spec_fields.each do |field_spec| + fields = fields_for_field_spec(field_spec) + doc.export_fields += fields if ! fields.blank? + end + if doc.export_fields.size < 3 + puts "Insufficient fields found in final-subject-set #{@set.id}: #{@set.subjects.first.location['standard']}" + nil + + else + doc + end + end + + def fields_for_field_spec(spec, base_assertion=nil) + if ! spec.repeats + best = best_for_field_spec(spec, base_assertion) + [best] if ! best.nil? + else + all_for_field_spec(spec, base_assertion) + end + end + + def best_for_field_spec(spec, base_assertion=nil) + all = all_for_field_spec(spec, base_assertion) + all.first if ! all.nil? + end + + def all_for_field_spec(spec, base_assertion=nil) + assertions = assertions_for_field_spec(spec, base_assertion).sort_by { |a| - a.confidence } + # puts "[Nothing found for #{spec.name}...]" if assertions.blank? + return nil if assertions.blank? + + fields = assertions.map do |assertion| + if ! spec.sub_fields.empty? + # puts "parsing out #{spec.name}...." + field = Export::DocumentField.new name: spec.name + spec.sub_fields.each do |field_spec| + # puts " parsing out #{field_spec.name}...." + fields = fields_for_field_spec field_spec, assertion + field.sub_fields += fields if ! fields.blank? + end + field + + else + clean_val = value_for_assertion assertion, spec.format, spec.format_options + Export::DocumentField.new name: spec.name, value: clean_val, original_value: assertion.data, assertion_ids: [assertion.id] + end + end + + fields.uniq do |field| + field.data + end + end + + def value_for_assertion(assertion, format=nil, format_options) + v = assertion.data + v = v["value"] if ! v["value"].nil? + v = self.class.apply_format(v, format, format_options) if ! format.nil? + v + end + + def assertions_for_field_spec(spec, base_assertion=nil) + # @doc["subjects"].first["assertions"].select { |a| a["name"] == name } + # TODO add assertion.subject_id so that we can do this: + subjects = base_assertion.nil? ? @set.subjects : [base_assertion.final_subject] + # in the meantime we'll just do this: + # subjects = @set.subjects + subjects.map do |subject| + assertions = subject.assertions + # puts "selecting within region: #{base_assertion.region}" if ! base_assertion.nil? + assertions = assertions.select { |assertion| assertion.region == base_assertion.region } if ! base_assertion.nil? + assertions = assertions.select { |a| a.name == (spec.select.nil? ? spec.name : spec.select ) } + assertions + end.flatten + end + + def self.apply_format(value, format, options=nil) + # puts "apply format: #{format} to #{value.inspect}" + case format + when 'date' + parse_date(value, options) + when 'address' + parse_address(value) + when 'monetary' + parse_monetary(value) + when 'dimensions' + parse_dimensions(value) + when 'numeric' + parse_numeric value + else + # puts "it's a hash? #{format.inspect}" + if value.is_a?(Hash) && format.is_a?(Hash) + # puts "it's a hash: #{format.inspect}" + ret = {} + value.keys.each do |k| + ret[k] = apply_format(value[k], format[k], options) + end + ret + else + value + end + end + end + + def self.parse_numeric(value) + return nil if ! value.match /\d/ + v = value.gsub(/,|\$|\.(-|\d{2}$)?/, '').to_i + v + end + + # Pull arbitrary number of English system dimensions from string + def self.parse_dimensions(value) + dims = [] + value.split(/x/).each do |v| + v.strip! + fract = 0 + # If there's a fraction... + fract_reg = / (\d+)\/(\d+)$/ + if (m = v.match(fract_reg)) + fract = m[1].to_f / m[2].to_f + v.sub! fract_reg, '' + end + # If inches given as [FEET].[INCHES] or [FEET] [INCHES]" .. + inches_reg = /(\.(\d+)| (\d+)")$/ + if (m = v.match(inches_reg)) + # This means previous fract was inches: (e.g. 1/2 inch) + fract /= 12 + # puts "summing fact: #{fract} + (#{m[2].to_f / 12})" + fract += m[2].to_f / 12 + v.sub! inches_reg, '' + end + dims << v.to_f + fract + end + dims + end + + def self.parse_monetary(value) + return nil if ! value.match /\d/ + v = value.gsub(/,|\$|\.(-|\d{2}$)?/, '').to_f + v + end + + def self.parse_date(value, options) + ret = nil + begin + ret = Date.parse(value) + rescue ArgumentError + puts "invalid date: #{value}" + end + + # Override default year expansion if a target range is configured and computed date is outside range: + # e.g. if options["range"] == [1850,1950], `16 should default to 1916, not 2016 + # Known issue: range should be a 100 yr span (or smaller), because otherwise century may be ambiguous + if ! ret.nil? && ! options.nil? && options["range"] && ret.year > options["range"].last + range = options["range"] + # Get two digit year: + partial_year = ret.year % 100 + # Round-down range to decades (e.g. [1800,1900]) + decades = range.map { |r| r - (r % 100) } + # See which of the (presumably 2) decades places the partial_year within range: + corrected_year = partial_year + decades.first > range.first ? decades.first + partial_year : decades.last + partial_year + # Rebuild date using corrected_year: + ret = Date.new corrected_year, ret.month, ret.day + end + + ret + end + + def self.parse_address(value) + value = value.dup + value.gsub! /^no\.? /i, '' + value + end + + +end diff --git a/app/models/export/document_field.rb b/app/models/export/document_field.rb new file mode 100644 index 000000000..9eb689c46 --- /dev/null +++ b/app/models/export/document_field.rb @@ -0,0 +1,33 @@ +class Export::DocumentField + include Mongoid::Document + + embedded_in :export_document + + field :name, type: String + field :value + field :original_value + field :assertion_ids, type: Array + + has_one :spec, class_name: 'Export::Spec::DocumentField' + embeds_many :sub_fields, class_name: 'Export::DocumentField' + + def data + if sub_fields.empty? + value + else + sub_fields.inject({}) do |h, f| + h[f.name] = f.data + h + end + end + end + + def to_s(indent=0) + if ! sub_fields.empty? + "#{name}:\n" + (" " * indent) + sub_fields.map { |f| f.to_s(indent+1) }.join("\n" + (" " * indent)) + + else + "#{name}: #{value} (orig \"#{original_value}\")" # [assertion(s) #{assertion_ids}]" + end + end +end diff --git a/app/models/export/spec/document.rb b/app/models/export/spec/document.rb new file mode 100644 index 000000000..88f14ea7c --- /dev/null +++ b/app/models/export/spec/document.rb @@ -0,0 +1,16 @@ +class Export::Spec::Document + include Mongoid::Document + + field :name, type: String + + embeds_many :spec_fields, class_name: 'Export::Spec::DocumentField' + embedded_in :project + + def self.from_hash(h, project) + inst = self.new project: project, name: h['name'] + inst.spec_fields = h['spec_fields'].map do |h| + Export::Spec::DocumentField.from_hash h, inst + end + inst + end +end diff --git a/app/models/export/spec/document_field.rb b/app/models/export/spec/document_field.rb new file mode 100644 index 000000000..54cd97064 --- /dev/null +++ b/app/models/export/spec/document_field.rb @@ -0,0 +1,26 @@ +class Export::Spec::DocumentField + include Mongoid::Document + + field :name, type: String + field :select, type: String + field :format # string, monetary, address, {} + field :format_options, type: Hash # e.g. "format_options": {"range": [1850,1950]} + field :repeats, type: Boolean + embeds_many :sub_fields, class_name: 'Export::Spec::DocumentField' + embedded_in :export_document_spec, class_name: 'Export::Spec::Document' + embedded_in :export_document_spec_field, class_name: 'Export::Spec::DocumentField' + + def to_s + name + (select.nil? ? '' : " (select: \"#{select}\")") + end + + def self.from_hash(h, doc_spec, parent_field=nil) + inst = self.new export_document_spec: doc_spec, name: h['name'], select: h['select'], format: h['format'], format_options: h['format_options'], repeats: h['repeats'] + if ! h['sub_fields'].blank? + h['sub_fields'].each do |sub_h| + inst.sub_fields << from_hash(sub_h, nil, inst) + end + end + inst + end +end diff --git a/app/models/final_data_export.rb b/app/models/final_data_export.rb new file mode 100644 index 000000000..f85c56a14 --- /dev/null +++ b/app/models/final_data_export.rb @@ -0,0 +1,11 @@ +class FinalDataExport + include Mongoid::Document + include Mongoid::Timestamps + + belongs_to :project + field :path, type: String + field :num_final_subject_sets, type: Integer + + scope :most_recent, -> { order(updated_at: -1) } + +end diff --git a/app/models/final_subject.rb b/app/models/final_subject.rb new file mode 100644 index 000000000..b045897c6 --- /dev/null +++ b/app/models/final_subject.rb @@ -0,0 +1,116 @@ +class FinalSubject + include Mongoid::Document + + field :type, type: String + field :location, type: Hash + field :status, type: String + field :width, type: Integer + field :height, type: Integer + field :meta_data, type: Hash + field :data, type: Hash + field :classifications_breakdown, type: Hash + field :flags, type: Hash + + belongs_to :subject + embedded_in :final_subject_set, inverse_of: :subjects + embeds_many :assertions, class_name: 'FinalSubjectAssertion' + + def fulltext_terms + assertions.select { |assertion| ! assertion.data.blank? && assertion.created_in_workflow != 'mark' }.map { |assertion| assertion.data.values }.select { |v| ! v.empty? } + end + + def fulltext_terms_by_field + assertions.select { |assertion| ! assertion.data.blank? && assertion.created_in_workflow != 'mark' }.inject({}) do |h, a| + field_name = a.name.blank? ? '_' : a.name + h[field_name] = [] if h[field_name].nil? + h[field_name] += a.data.values.select { |v| ! v.empty? } + h + end + end + + def self.create_from_subject(subject) + inst = self.new subject: subject + [:type, :location, :status, :width, :height, :meta_data].each do |p| + inst.send("#{p}=", subject.send(p)) + end + + inst.build_assertions! + # inst.build_classifications_breakdown! + # inst.build_data! + + inst + end + + def build_data! + distinct = assertions.inject({}) do |h, assertion| + if assertion.created_in_workflow != 'mark' + h[assertion.task_key] = [] if h[assertion.task_key].nil? + data = assertion.data + data = data["values"].first if ! data["values"].nil? + data = data["value"] if data["value"] + stmt = {value: data, label: assertion.instructions['transcribe']} + has_data = ! data.blank? + has_data &= ! data.values.select { |v| ! v.blank? }.empty? if data.is_a? Hash + h[assertion.task_key] << stmt if has_data && ! h[assertion.task_key].include?(stmt) + end + h + end + self.data = distinct + end + + def build_assertions! + assertions.destroy_all + + flattened_subjects(subject.child_subjects).each do |s| + assertions << FinalSubjectAssertion.create_from_subject(s[:subject], s[:parents]) + end + + self + end + + def build_classifications_breakdown! + all_classifications = [] + @all_subjects.each do |s| + all_classifications += s.classifications + end + self.classifications_breakdown = all_classifications.inject({}) { |h, c| h[c.task_key] ||= 0; h[c.task_key] += 1; h } + self.classifications_breakdown[:total] = subject.classifications.count + end + + def flags + { + complete: flagged_for_retirement, + bad: { + votes_in_favor: subject.flagged_bad_count || 0 + } + } + end + + def flagged_for_retirement + votes = subject.number_of_completion_assessments + h = { + votes_in_favor: subject.retire_count || 0, + total_votes: votes, + } + h[:percentage_in_favor] = subject.retire_count / votes.to_f if ! subject.retire_count.nil? && votes > 0 + h + end + + def flattened_subjects(subjects, parents = []) + @all_subjects ||= [] + @all_subjects += subjects + + ret = [] + subjects.each do |s| + next if ! s.parent_classifications.empty? && s.parent_classifications.limit(1).first.task_key == 'completion_assessment_task' + + if s.child_subjects.size > 0 + ret += flattened_subjects(s.child_subjects, parents + [s]) + + else + ret << {subject: s, parents: parents} if s.status != 'bad' + end + end + ret + end +end diff --git a/app/models/final_subject_assertion.rb b/app/models/final_subject_assertion.rb new file mode 100644 index 000000000..592c5d174 --- /dev/null +++ b/app/models/final_subject_assertion.rb @@ -0,0 +1,124 @@ +class FinalSubjectAssertion + include Mongoid::Document + + field :name, type: String + field :status, type: String + field :created_in_workflow, type: String + field :confidence, type: Float + field :data, type: Hash + field :versions, type: Array + field :region, type: Hash + field :task_key, type: String + field :instructions, type: Hash + + belongs_to :root_subject, class_name: "Subject" + + embedded_in :final_subject, inverse_of: :assertions + + def self.create_from_subject(subject, parents) + inst = new + + inst.name = subject.export_name + inst.status = status_for_subject(subject) + inst.created_in_workflow = subject.parent_workflow.nil? ? nil : subject.parent_workflow.name + inst.confidence = confidence_for_subject(subject) + inst.data = data_for_subject(subject) + inst.versions = classifications_for_subject(subject) + inst.region = region_for_subject(subject) + inst.task_key = subject.parent_classifications.empty? ? nil : subject.parent_classifications.limit(1).first.task_key + inst.instructions = instructions_for_subject(subject, parents) + + inst + end + + def self.classifications_for_subject(subject) + # Hack to show all distinct classifications with counts for terminal subjects being transcribed: + # if object[:subject].parent_workflow.name == 'transcribe' + + annotations_with_confidence subject if ! subject.parent_workflow.nil? && subject.parent_workflow.name != 'mark' + end + + def self.instructions_for_subject(subject, parents) + ret = {} + + parents.each do |s| + next if s.parent_workflow.nil? + + if s.parent_workflow.name == 'mark' && subject.region && subject.region[:label] + ret[s.parent_workflow.name] = subject.region[:label] + + else + ret[s.parent_workflow.name] = s.parent_workflow_task.instruction + end + end + ret[subject.parent_workflow.name] = subject.parent_workflow_task.instruction if ! subject.parent_workflow.nil? + ret + end + + def self.region_for_subject(subject) + region = subject.region + return nil if region.nil? + + # not important: + region.delete 'color' + + # Translate toolName to generic 'shape' name: + region[:shape] = case region[:toolName] + when 'rectangleTool','rowTool' then 'rectangle' + when 'pointTool' then 'point' + end + region.delete 'toolName' + + region + end + + def self.data_for_subject(subject) + data = nil + if ['complete','retired'].include? subject.status + data = subject.data + else + cl = annotations_with_confidence(subject).first + data = cl.nil? ? nil : cl[:data] + end + data = data['values'].first if data && data['values'] + + data + end + + def self.confidence_for_subject(subject) + if subject.status == 'complete' + 1 + elsif subject.status == 'retired' + 1 + else + annotations_with_confidence(subject).map { |a| a[:confidence] }.max + end + end + + def self.status_for_subject(subject) + return nil if subject.parent_workflow.nil? + + return 'complete' if subject.status == 'complete' + + if subject.parent_workflow.name == 'transcribe' + return 'awaiting_transcriptions' if subject.status == 'inactive' + return 'awaiting_votes' if subject.status == 'active' + + elsif subject.parent_workflow.name == 'verify' + return 'awaiting_votes' if subject.status == 'inactive' + end + + subject.status + end + + + def self.annotations_with_confidence(subject) + num_votes = [subject.parent_workflow.nil? ? 3 : subject.parent_workflow.generates_subjects_after, subject.parent_classifications.count].max + grouped = subject.parent_classifications.inject({}) { |h, c| h[c.annotation] ||= 0; h[c.annotation] += 1; h } + classifications_by_annotation = subject.parent_classifications.inject({}) { |h, c| h[c.annotation] ||= []; h[c.annotation] << {created: c.created_at, user_id: c.user_id, duration: c.finished_at.to_time - c.started_at.to_time, user_id: c.user_id.to_s }; h } + grouped = grouped.inject([]) { |a,(annotation,count)| a << {data: annotation, votes: count, confidence: count.to_f / num_votes, instances: classifications_by_annotation[annotation] }; a } + grouped = grouped.sort_by { |a| - a[:confidence] } + grouped + end + +end diff --git a/app/models/final_subject_set.rb b/app/models/final_subject_set.rb new file mode 100644 index 000000000..95d728b65 --- /dev/null +++ b/app/models/final_subject_set.rb @@ -0,0 +1,120 @@ +class FinalSubjectSet + include Mongoid::Document + include Mongoid::Timestamps + + scope :by_export_field, -> (name, value, exact) do + where({ + "export_document.export_fields" => { + '$elemMatch' => { + name: name, + value: ( exact ? value : { "$regex" => /#{value}/i } ) + } + } + }) + end + + scope :by_export_field_range, -> (name, values) do + m = { } + m["$gte"] = values.first if ! values.first.nil? + m["$lte"] = values.last if ! values.last.nil? + where({ + "export_document.export_fields" => { + '$elemMatch' => { + name: name, + value: m + } + } + }) + end + + belongs_to :project + belongs_to :subject_set + field :name, type: String + field :meta_data, type: Hash + + field :search_terms + field :search_terms_by_field + + index({"subjects.assertions.confidence" => 1}, {background: true}) + index({"subjects.assertions.task_key" => 1}, {background: true}) + index({"subject_set_id" => 1}, {background: true}) + index({"project_id" => 1}, {background: true}) + + index({"search_terms" => "text"}) + + [:total, :complete, :awaiting_votes, :in_progress, :awaiting_transcriptions].each do |field| + index({"subjects.assertions_breakdown.all_workflows.#{field}" => 1}, {background: true}) + end + + embeds_many :subjects, class_name: 'FinalSubject' + embeds_one :export_document, class_name: "Export::Document" + + def build_search_terms + update_attributes({ + search_terms: compute_fulltext_terms, + search_terms_by_field: compute_fulltext_terms_by_field + }) + end + + def self.export_spec_fields + Project.current.export_document_specs.map do |spec| + spec.spec_fields + end.flatten + end + + def build_export_document + if ! Project.current.export_document_specs.blank? + self.export_document = Export::Document.from_set self, Project.current.export_document_specs + else + puts "No export_document_specs configured for #{Project.current.title}" + end + end + + def compute_fulltext_terms + compute_fulltext_terms_by_field.values.flatten.uniq + end + + def compute_fulltext_terms_by_field + subjects.map { |subject| subject.fulltext_terms_by_field }.inject({}) do |h, terms| + terms.each do |(k,vs)| + h[k] = [] if h[k].nil? + h[k] += vs + end + h + end + end + + def self.assert_for_set(set, rebuild=false) + # If final_subject_set record was built after most recent generated subject, consider skipping + if ! rebuild && (final_ss = find_by(subject_set:set)) + subjs_updated = set.subjects.max(:updated_at) + return if final_ss.updated_at > subjs_updated + end + inst = find_or_create_by subject_set: set + inst.project = set.project + inst.meta_data = set.meta_data + inst.update_subjects + inst.build_search_terms + inst.build_export_document + puts "Saving final subject set: #{inst.id}" + inst.save! + end + + def update_subjects + + subjects.destroy_all + + subject_set.subjects.root.each do |subject| + subjects << FinalSubject.create_from_subject(subject) + end + end + + def self.rebuild_indexes(for_project) + collection.indexes.drop unless self.count == 0 # If no records yet saved, moped will error when dropping indexes + for_project.export_names.each do |(key,name)| + index({"search_terms_by_field.#{key}" => 1}, {background: true}) + index({"export_document.export_fields.name" => 1, "export_document.export_fields.value" => 1}) + end + create_indexes + end +end diff --git a/app/models/project.rb b/app/models/project.rb index 6bfc3e58a..4f97f036c 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -16,12 +16,13 @@ class Project field :scientists, type: Array, default: [] field :developers, type: Array, default: [] field :pages, type: Array, default: [] + field :page_navs, type: Hash, default: {} field :menus, type: Hash, default: {} field :partials, type: Hash, default: {} - field :logo, type: String - field :background, type: String - field :favicon, type: String - field :forum, type: Hash + field :logo, type: String, default: nil + field :background, type: String, default: nil + field :favicon, type: String, default: nil + field :forum, type: Hash, default: nil field :feedback_form_url, type: String field :discuss_url, type: String field :blog_url, type: String @@ -29,12 +30,13 @@ class Project field :styles, type: String field :custom_js, type: String field :admin_email, type: String - field :team_emails, type: Array + field :team_emails, type: Array, default: [] field :metadata_search, type: Hash field :tutorial, type: Hash field :terms_map, type: Hash, default: {} # Hash mapping internal terms to project appropriate terms (e.g. 'group'=>'ship') field :status, type: String, default: 'inactive' - field :analytics, type: Hash + field :analytics, type: Hash, default: nil + field :downloadable_data, type: Boolean # 10.27.15 until we can sort out a better time to call this method, lets comment it out. include CachedStats @@ -44,6 +46,10 @@ class Project has_many :subject_sets has_many :workflows, dependent: :destroy, order: "order ASC" has_many :subjects + has_many :final_subject_sets + has_many :final_data_exports + + embeds_many :export_document_specs, class_name: "Export::Spec::Document" scope :most_recent, -> { order(updated_at: -1) } scope :active, -> { where(status: 'active') } @@ -63,6 +69,21 @@ def self.current active.first end + # get Distinct export_names from all workflow_tasks + def export_names + workflows.inject([]) do |a, w| + a += w.tasks.map { |t| t.export_name } + + end.select do |n| + ! n.nil? + + end.inject({}) do |h, name| + key = name.gsub(' ', '-').gsub(/[^A-Za-z0-9-]/, '') + h[key] = name + h + end + end + def calc_stats # amount of days to calculate statistics for range_in_days = 7 diff --git a/app/models/subject.rb b/app/models/subject.rb index f309300b2..e6f726a6e 100644 --- a/app/models/subject.rb +++ b/app/models/subject.rb @@ -25,7 +25,7 @@ class Subject field :type, type: String, default: "root" #options: "root", "secondary" field :status, type: String, default: "active" #options: "active", "inactive", "bad", "retired", "complete", "contentious" - field :meta_data, type: Hash + field :meta_data, type: Hash, default: {} field :classification_count, type: Integer, default: 0 field :random_no, type: Float field :secondary_subject_count, type: Integer, default: 0 @@ -57,6 +57,7 @@ class Subject belongs_to :group belongs_to :parent_subject, :class_name => "Subject", :foreign_key => "parent_subject_id" belongs_to :subject_set, :class_name => "SubjectSet", :foreign_key => "subject_set_id" + belongs_to :project has_many :child_subjects, :class_name => "Subject" has_many :classifications, inverse_of: :subject @@ -74,7 +75,12 @@ class Subject index({"type" => 1, "subject_set_id" => 1}, {background: true}) # Index for fetching child subjects for a parent subject, optionally filtering by region NOT NULL index({parent_subject_id: 1, status: 1, region: 1}) - + + def created_solely_by?(user) + created_by = created_by_user_id == user.id.to_s + created_by ||= creating_user_ids.size == 1 && creating_user_ids.first == user.id.to_s + created_by + end def thumbnail location['thumbnail'].nil? ? location['standard'] : location['thumbnail'] @@ -117,6 +123,13 @@ def parent_workflow_task end end + def export_name + return nil if parent_workflow.nil? + + transcribe_subject = parent_workflow.name == 'transcribe' ? self : parent_subject + transcribe_subject.parent_workflow_task.export_name if transcribe_subject && transcribe_subject.parent_workflow_task + end + # find all the classifications for subject where task_key == compleletion_assesment_task # calculate the percetage vote for retirement (pvr) # if pvr is equal or greater than retire_limit, set self.status == retired. @@ -131,7 +144,7 @@ def check_flagged_bad_count # calculate the percetage vote for retirement (pvr) # if pvr is equal or greater than retire_limit, set self.status == retired. def check_retire_by_vote - assesment_classifications = classifications.where(task_key: "completion_assessment_task").count + assesment_classifications = number_of_completion_assessments if assesment_classifications > 2 percentage_for_retire = retire_count / assesment_classifications.to_f if percentage_for_retire >= workflow.retire_limit @@ -141,6 +154,10 @@ def check_retire_by_vote end end + def number_of_completion_assessments + classifications.where(task_key: "completion_assessment_task").count || 0 + end + def bad! status! 'bad' @@ -150,7 +167,6 @@ def bad! def retire! return if status == "bad" - return if classifying_user_ids.length < workflow.retire_limit status! 'retired' subject_set.subject_completed_on_workflow(workflow) if ! workflow.nil? @@ -174,6 +190,10 @@ def calculate_most_popular_parent_classification buckets.map { |(k,v)| {ann: k, percentage: v.to_f / parent_classifications.count } }.first end + def parent_workflow + parent_classifications.limit(1).first.workflow if ! parent_classifications.empty? + end + def to_s "#{status != 'active' ? "[#{status.capitalize}] " : ''}#{workflow.nil? ? 'Final' : workflow.name.capitalize} Subject (#{type})" @@ -204,6 +224,34 @@ def self.group_by_field_for_group(group, field, match={}) end + def self.find_or_create_root_by_standard_url(standard_url) + subject = Subject.find_by type: 'root', "location.standard" => standard_url + if subject.nil? + subject = Subject.create_root_for_url standard_url + end + subject + end + + def self.create_root_for_url(standard_url) + + require 'fastimage' + width, height = FastImage.size(standard_url,:raise_on_failure=>false, :timeout=>10.0) + + subject = Subject.create({ + type: 'root', + subject_set: SubjectSet.create({project: Project.current, group: Project.current.groups.first, state: 'active'}), + location: { + standard: standard_url + }, + width: width, + height: height + }) + subject.workflow = Workflow.find_by name: 'mark' + subject.activate! + subject + end + + private def status!(status) diff --git a/app/models/subject_generation_method.rb b/app/models/subject_generation_method.rb index 9faabbbcb..caf5d9dce 100644 --- a/app/models/subject_generation_method.rb +++ b/app/models/subject_generation_method.rb @@ -47,6 +47,10 @@ def subject_attributes_from_classification(classification) if (label = task.tool_label(classification)) region[:label] = label end + # If region.color not passed from client, derive it from workflow_task tool config: + if ! region[:color] && task.sub_tool_config(classification) + region[:color] = task.sub_tool_config(classification)[:color] + end { parent_subject: classification.subject, diff --git a/app/models/subject_generation_methods/collect_unique.rb b/app/models/subject_generation_methods/collect_unique.rb index b0c685f8b..ebfc029db 100644 --- a/app/models/subject_generation_methods/collect_unique.rb +++ b/app/models/subject_generation_methods/collect_unique.rb @@ -34,7 +34,8 @@ def process_classification(classification) if num_parent_classifications >= classification.workflow.generates_subjects_after # Get number of distinct classifications: - num_vals = classification.child_subject.data['values'].nil? ? -1 : classification.child_subject.data['values'].size + # num_vals = classification.child_subject.data['values'].nil? ? -1 : classification.child_subject.data['values'].size + num_vals = atts[:data]['values'].size # Where will this generated subject appear, if anywhere? next_workflow = classification.child_subject.workflow @@ -49,13 +50,13 @@ def process_classification(classification) verify_method = next_workflow.generates_subjects_method # If next workflow's generation method is most-popular and everyone transcribed the same thing, auto upgrade to 'complete': - if num_vals == 1 && verify_method == 'most-popular' + # (but only if num_parent_classifications > 1) + if num_vals == 1 && verify_method == 'most-popular' && num_parent_classifications > 1 atts[:status] = 'complete' # .. Otherwise, activate the generated subject into the next workflow: else - classification.child_subject.activate! - atts.delete :status + atts[:status] = 'active' end end end @@ -68,9 +69,11 @@ def process_classification(classification) atts[:creating_user_ids] ||= [] classification.child_subject.creating_user_ids.push classification.user_id - # puts "Saving atts to classification: #{atts.inspect}" classification.child_subject.update_attributes atts + # Now that child subj is saved (with a parent subject_set) Fire activate hooks if activating: + classification.child_subject.activate! if atts[:status] == 'active' + classification.child_subject end diff --git a/app/models/user.rb b/app/models/user.rb index 00258c5f5..11baa88dd 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -36,7 +36,7 @@ class User field :profile_url, :type => String # URI of user profile, if any field :status, :type => String, :default => 'active' - field :role, :type => String, :default => 'user' # user, admin, team + field :role, :type => String, :default => 'user' # user, admin, team, bot field :guest, :type => Boolean, :default => false field :tutorial_complete, :type => Boolean, :default => false @@ -221,5 +221,4 @@ def self.group_by_hour(match={}) h end end - end diff --git a/app/models/workflow.rb b/app/models/workflow.rb index 7a380337a..bc0d92209 100644 --- a/app/models/workflow.rb +++ b/app/models/workflow.rb @@ -6,7 +6,7 @@ class Workflow field :key, type: String field :label, type: String field :first_task, type: String - field :retire_limit, type: Integer, default: 3 + field :retire_limit, type: Float, default: 0.75 field :subject_fetch_limit, type: Integer, default: 10 field :generates_subjects, type: Boolean, default: true field :generates_subjects_after, type: Integer, default: 0 diff --git a/app/models/workflow_task.rb b/app/models/workflow_task.rb index 81a743d3e..7d10d6bab 100644 --- a/app/models/workflow_task.rb +++ b/app/models/workflow_task.rb @@ -10,6 +10,7 @@ class WorkflowTask field :next_task, type: String field :help, type: Hash field :examples, type: Array + field :export_name, type: String embedded_in :workflow diff --git a/app/serializers/export/document_field_serializer.rb b/app/serializers/export/document_field_serializer.rb new file mode 100644 index 000000000..f1d572ee4 --- /dev/null +++ b/app/serializers/export/document_field_serializer.rb @@ -0,0 +1,8 @@ +class Export::DocumentFieldSerializer < ActiveModel::MongoidSerializer + attributes :name, :value, :original_value, :assertion_ids + + def assertion_ids + object.assertion_ids.map { |oid| oid.to_s } unless object.assertion_ids.blank? + end + +end diff --git a/app/serializers/export/document_serializer.rb b/app/serializers/export/document_serializer.rb new file mode 100644 index 000000000..b6824484b --- /dev/null +++ b/app/serializers/export/document_serializer.rb @@ -0,0 +1,5 @@ +class Export::DocumentSerializer < ActiveModel::MongoidSerializer + attributes :name + + has_many :export_fields +end diff --git a/app/serializers/export/spec/document_field_serializer.rb b/app/serializers/export/spec/document_field_serializer.rb new file mode 100644 index 000000000..30cf4db12 --- /dev/null +++ b/app/serializers/export/spec/document_field_serializer.rb @@ -0,0 +1,7 @@ +class Export::Spec::DocumentFieldSerializer < ActiveModel::MongoidSerializer + attributes :format, :name + + def format + object.sub_fields.blank? && object.format.nil? ? 'string' : object.format + end +end diff --git a/app/serializers/export/spec/document_serializer.rb b/app/serializers/export/spec/document_serializer.rb new file mode 100644 index 000000000..0156aac54 --- /dev/null +++ b/app/serializers/export/spec/document_serializer.rb @@ -0,0 +1,5 @@ +class Export::Spec::DocumentSerializer < ActiveModel::MongoidSerializer + + has_many :spec_fields + +end diff --git a/app/serializers/final_data_export_serializer.rb b/app/serializers/final_data_export_serializer.rb new file mode 100644 index 000000000..f26c2a8c1 --- /dev/null +++ b/app/serializers/final_data_export_serializer.rb @@ -0,0 +1,3 @@ +class FinalDataExportSerializer < ActiveModel::MongoidSerializer + attributes :created_at, :num_final_subject_sets +end diff --git a/app/serializers/final_data_serializer.rb b/app/serializers/final_data_serializer.rb deleted file mode 100644 index e9256d0b1..000000000 --- a/app/serializers/final_data_serializer.rb +++ /dev/null @@ -1,19 +0,0 @@ -class FinalDataSerializer < ActiveModel::MongoidSerializer - attributes :data, :links, :meta - - root false - - def data - options = serialization_options.merge({root: false}) - object.map { |s| FinalDataSubjectSetSerializer.new(s, root: false) } - end - - def meta - { - } - end - - def links - {} - end -end diff --git a/app/serializers/final_data_subject_serializer.rb b/app/serializers/final_data_subject_serializer.rb deleted file mode 100644 index a2573cfaf..000000000 --- a/app/serializers/final_data_subject_serializer.rb +++ /dev/null @@ -1,85 +0,0 @@ -class FinalDataSubjectSerializer < ActiveModel::MongoidSerializer - - attributes :id, :type, :location, :region, :width, :height, :meta_data - attributes :data # , :task - attributes :classification_count - attributes :generated_in_workflow - attributes :child_subjects - attributes :transcription_classifications - - def attributes - data = super - - # For brevity, remove attributes that are redundant or always null: - - if data[:type] == 'root' - # Root subjects don't have data: - data.delete :data - data.delete :generated_in_workflow - - else - # All of these are inherited from parent subject, so remove: - data.delete :location - data.delete :width - data.delete :height - data.delete :meta_data - end - - if data[:generated_in_workflow] == 'mark' - # Mark subjects have roughly same info in :data and :region so keep :region - data.delete :data - else - # .. For all other child subjects, delete :region since it's avail in parent - data.delete :region - end - data.delete :transcription_classifications if data[:transcription_classifications].empty? - data.delete :child_subjects if data[:child_subjects].empty? - - data - end - - def generated_in_workflow - return nil if object.parent_subject.nil? - puts "parent subj: #{object}" - object.parent_subject.classifications.first.workflow.name - end - - def child_subjects - object.child_subjects.map { |s| FinalDataSubjectSerializer.new(s, root: false) } - end - - def task - return nil if object.parent_workflow_task.nil? - - task = object.parent_workflow_task - { - instruction: task.instruction, - help: task.help, - tool: task.tool, - tool_config: task.tool_config - } - end - - def classification_count - object.classifications.count - end - - def id - object._id.to_s - end - - def include_data? - ! object.data.nil? - end - - def include_task? - ! object.parent_workflow_task.nil? - end - - def transcription_classifications - transcribe_workflow_id = Workflow.where(name:"transcribe").to_a[0]._id - transcription_classifications = object.classifications.where( {workflow_id: transcribe_workflow_id} ).to_a - object.classifications.where( {workflow_id: transcribe_workflow_id} ).map{ |c| FinalClassificationSerializer.new(c, root: false) } - end - -end diff --git a/app/serializers/final_data_subject_set_serializer.rb b/app/serializers/final_data_subject_set_serializer.rb deleted file mode 100644 index ebc049243..000000000 --- a/app/serializers/final_data_subject_set_serializer.rb +++ /dev/null @@ -1,17 +0,0 @@ -class FinalDataSubjectSetSerializer < ActiveModel::MongoidSerializer - - attributes :id - attributes :name - attributes :meta_data - attributes :classification_count - attributes :subjects - - def subjects - object.subjects.root.map { |s| FinalDataSubjectSerializer.new(s, root: false) } - end - - def id - object._id.to_s - end - -end diff --git a/app/serializers/final_subject_assertion_serializer.rb b/app/serializers/final_subject_assertion_serializer.rb new file mode 100644 index 000000000..0e2231096 --- /dev/null +++ b/app/serializers/final_subject_assertion_serializer.rb @@ -0,0 +1,19 @@ +class FinalSubjectAssertionSerializer < ActiveModel::MongoidSerializer + + attributes :id, :status + attributes :name + attributes :created_in_workflow + attributes :confidence + attributes :data + attributes :versions + attributes :region + attributes :task_key + attributes :instructions + + root false + + def id + object.id.to_s + end + +end diff --git a/app/serializers/final_subject_serializer.rb b/app/serializers/final_subject_serializer.rb new file mode 100644 index 000000000..95f13ca47 --- /dev/null +++ b/app/serializers/final_subject_serializer.rb @@ -0,0 +1,11 @@ +class FinalSubjectSerializer < ActiveModel::MongoidSerializer + + attributes :id, :type, :location, :status, :width, :height, :meta_data + has_many :assertions + + # scope :by_keyword, -> (keyword) { where(: keyword) } + + def id + object.id.to_s + end +end diff --git a/app/serializers/final_subject_set_serializer.rb b/app/serializers/final_subject_set_serializer.rb new file mode 100644 index 000000000..8aa84a73e --- /dev/null +++ b/app/serializers/final_subject_set_serializer.rb @@ -0,0 +1,15 @@ +class FinalSubjectSetSerializer < ActiveModel::MongoidSerializer + + attributes :id, :meta_data, :type, :search_terms_by_field + + has_many :subjects + has_one :export_document + + def id + object.id.to_s + end + + def type + 'final_subject_set' + end +end diff --git a/app/serializers/generic_result_serializer.rb b/app/serializers/generic_result_serializer.rb new file mode 100644 index 000000000..f945e2f7d --- /dev/null +++ b/app/serializers/generic_result_serializer.rb @@ -0,0 +1,52 @@ +# Generic serializer for arrays of objects of arbitrary types +# Produces JSONAPI style results with pagination meta +class GenericResultSerializer < ActiveModel::MongoidSerializer + attributes :data, :links, :meta + + root false + + # This serializes both single objects and arrays of objects, so data should output either a hash or an array respectively: + def data + options = serialization_options.merge({root: false, scope: scope}) + + # Array of results? + if object.respond_to? :each + return [] if object.empty? + + # Determine what serializer to use based on class of first item: + klass = object.first.class.to_s + serializer_class = eval("#{klass}Serializer") + object.map { |s| serializer_class.new(s, options) } + + else + # Determine what serializer to use based on class of first item: + klass = object.class.to_s + serializer_class = eval("#{klass}Serializer") + serializer_class.new(object, options) + end + end + + def meta + m = { + current_page: object.current_page, + next_page: object.next_page, + prev_page: object.prev_page, + total_pages: object.total_pages, + total: object.count + } if object.respond_to? :current_page + m = m.merge(serialization_options[:meta]) if ! serialization_options[:meta].nil? + m + end + + def links + m = {} + if serialization_options[:base_url] + base_url,query = serialization_options[:base_url].split '?' + query = Rack::Utils.parse_nested_query query + m[:next_page_uri] = "#{base_url}?#{query.merge({"page" => object.next_page}).to_query}" if object.next_page + m[:prev_page_uri] = "#{base_url}?#{query.merge({"page" => object.prev_page}).to_query}" if object.prev_page + end + m.merge! serialization_options[:links] if serialization_options[:links] + m + end +end diff --git a/app/serializers/project_serializer.rb b/app/serializers/project_serializer.rb index b12ec0023..a0319e603 100644 --- a/app/serializers/project_serializer.rb +++ b/app/serializers/project_serializer.rb @@ -1,20 +1,26 @@ class ProjectSerializer < ActiveModel::MongoidSerializer - attributes :id, :title, :short_title, :summary, :home_page_content, :organizations , :team, :pages, :menus, :partials, :logo, :background, :workflows, :forum, :tutorial, :feedback_form_url, :metadata_search, :terms_map, :blog_url, :discuss_url, :privacy_policy + attributes :id, :title, :short_title, :summary, :home_page_content, :organizations , :team, :pages, :page_navs, :menus, :partials, :logo, :background, :workflows, :forum, :tutorial, :feedback_form_url, :metadata_search, :terms_map, :blog_url, :discuss_url, :privacy_policy, :downloadable_data, :latest_export + attributes :classification_count, :root_subjects_count has_many :workflows - # delegate :current_or_guest_user, to: :scope + has_many :export_document_specs - def id - object._id.to_s + def latest_export + FinalDataExportSerializer.new FinalDataExport.most_recent.first, root: false + end + + def classification_count + # TODO: This should be scoped to project, but Classification#project_id doesn't exist + Classification.count end -=begin - def current_user_tutorial - user = scope.nil? ? nil : current_or_guest_user - unless user == nil - user.tutorial_complete - end + def root_subjects_count + # TODO: This too should be scoped to project, but Subject#project_id doesn't exist... + Subject.root.count + end + + def id + object._id.to_s end -=end end diff --git a/app/views/admin/dashboard/index.html.erb b/app/views/admin/dashboard/index.html.erb index 0b434ca42..5944a6499 100644 --- a/app/views/admin/dashboard/index.html.erb +++ b/app/views/admin/dashboard/index.html.erb @@ -106,7 +106,7 @@
-

Verify

+

Transcriptions Being Verified

0 Total @@ -117,11 +117,13 @@
Active:
-
Verify subjects actively being voted upon
+
Transcriptions actively being voted upon
Inactive:
-
Verify subjects waiting for one or more transcriptions before being activated
+
Transcriptions awaiting one or more additional transcriptions before being voted upon
Complete:
-
Verify subjects that have received sufficient votes to choose one best transcription
+
Transcriptions that skipped voting because transcriptions were identical
+
Retired:
+
Transcriptions taken out of voting because voting has ended
diff --git a/app/views/admin/data/index.html.erb b/app/views/admin/data/index.html.erb index 119376c19..bddc8654e 100644 --- a/app/views/admin/data/index.html.erb +++ b/app/views/admin/data/index.html.erb @@ -1,36 +1,32 @@

Data

-
- -

<%= @num_complete %> complete subject(s) ready for export (<%= @num_non_root %> pending).

- -

Format

-<% { "JSON" => 'json', - "CSV" => 'csv' - }.each do |(label, key)| - input_id = "download_format_#{key}" -%> -
- /> - -
-<% end %> +<% if @export.nil? %> +

No data exports have yet been built.

+

Please run `rake project:build_and_export_final_data` first

-

Completeness

-<% { "Complete (Only crowd-verified subjects)" => 'complete', - "All (All data in a massive json struc)" => 'all' - }.each do |(label, key)| - input_id = "download_status_#{key}" -%> -
- /> - -
-<% end %> +<% else %> +

Most recent data export: + +

+
Items
+
<%= @export.num_final_subject_sets %>
+ +
Built
+
<%= @export.updated_at.strftime('%B %-d, %Y') %> +
-

Download

+
Download - +

Make Public?

-
+

Should the public be able to download the latest from /data/latest and subscribe to the data updates ATOM feed?

+ +
+ <%= check_box 'project', "downloadable_data" %> + <%= label 'project','downloadable_data', 'Allow the public to download data' %> + +

+
+ +<% end %> diff --git a/app/views/final_data_exports/index.atom.builder b/app/views/final_data_exports/index.atom.builder new file mode 100644 index 000000000..2b22ca158 --- /dev/null +++ b/app/views/final_data_exports/index.atom.builder @@ -0,0 +1,11 @@ +atom_feed do |feed| + + feed.title("#{Project.current.title} Data Exports") + feed.updated(@exports[0].created_at) if @exports.length > 0 + + @exports.each do |export| + feed.entry(export) do |entry| + entry.title("#{export.updated_at.strftime('%c')}: #{export.num_final_subject_sets} subjects") + end + end +end diff --git a/config/initializers/register_project_static_routes.rb b/config/initializers/register_project_static_routes.rb index 05cb4a14b..9c61b0ad3 100644 --- a/config/initializers/register_project_static_routes.rb +++ b/config/initializers/register_project_static_routes.rb @@ -1,7 +1,11 @@ API::Application.configure do - if Project.current - project_assets_path = "./project/#{Project.current.key}/assets" - puts "Routing static assets from #{project_assets_path}" - Rails.application.config.middleware.insert_after ActionDispatch::Static, ActionDispatch::Static, project_assets_path + begin + if Project.current + project_assets_path = "./project/#{Project.current.key}/assets" + puts "Routing static assets from #{project_assets_path}" + Rails.application.config.middleware.insert_after ActionDispatch::Static, ActionDispatch::Static, project_assets_path + end + rescue + puts "FAILED to register static routing" end end diff --git a/config/routes.rb b/config/routes.rb index b6b9f7781..eb8162aae 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -8,6 +8,7 @@ get '/projects', to: 'projects#index', defaults: { format: 'json' } + get '/projects/current', to: 'projects#current', defaults: { format: 'json' } get '/workflows', to: 'workflow#index', defaults: { format: 'json' } get '/workflows/:id', to: 'workflow#show', defaults: { format: 'json' } @@ -35,10 +36,16 @@ resources :groups, only: [:show, :index], :defaults => { :format => 'json' } + # Final data: + resources :final_subject_sets, only: [:show, :index], :defaults => { :format => 'json' } + get '/data/latest', to: 'final_data_exports#latest' + resources :final_data_exports, only: [:show, :index], path: "/data" + namespace :admin do resources :subject_sets, :subjects, :classifications, :users get 'dashboard' => 'dashboard#index' get 'data' => 'data#index' + post 'data' => 'data#index' get 'data/download' => 'data#download' get 'signin' => 'auth#signin' post 'stats/recalculate' => 'dashboard#recalculate_stats' diff --git a/lib/tasks/bot.rake b/lib/tasks/bot.rake new file mode 100644 index 000000000..e52cb053e --- /dev/null +++ b/lib/tasks/bot.rake @@ -0,0 +1,46 @@ +namespace :bot do + + desc "Create Bot with name, printing out token to use in HTTP_BOT_AUTH" + task :create, [:name] => :environment do |task, args| + args.with_defaults name: 'ScribeBot' + + ret = BotUser.create args[:name] + + if ! ret[:token].blank? + puts "Created #{ret[:user].name}. Use HTTP header to authenticate:" + puts " #{BotUser::AUTH_HEADER}=#{BotUser::pack_auth_header(ret[:user].id, ret[:token])}" + else + puts "#{ret[:user].name} already exists, so token can not be read but may be reset. Use bot:reset to reset token." + end + end + + desc "Reset Bot token with name, printing out token to use in HTTP_ROBOT_AUTH" + task :reset, [:name] => :environment do |task, args| + args.with_defaults name: 'ScribeBot' + + user = BotUser.find_by name: args[:name] + token = user.reset_token! + + if token + puts "Reset #{user.name}. Use HTTP header to authenticate:" + puts " #{BotUser::AUTH_HEADER}=#{BotUser::pack_auth_header(user.id, token)}" + end + end + + desc "Delete Bot by name" + task :delete, [:name] => :environment do |task, args| + if args[:name].blank? + puts "No name given. Aborting." + exit + end + + user = BotUser.find_by name: args[:name] + if user + user.destroy + puts "Removed #{user.name}" + else + puts "Bot user #{args[:name]} could not be found" + end + end + +end diff --git a/lib/tasks/project.rake b/lib/tasks/project.rake index 6bf1f6ed7..faeb4898a 100644 --- a/lib/tasks/project.rake +++ b/lib/tasks/project.rake @@ -126,70 +126,34 @@ namespace :project do # load project_file_path project = Project.find_or_create_by key: project_key - # Establish some defaults so that if they're not set in the project hash, we overwrite the old value with the null default - project_defaults = { - background: nil, - logo: nil, - favicon: nil, - terms_map: {}, - team_emails: [], - team: [], - organizations: [], - analytics: nil, - forum: nil, - menus: {}, - partials: {} - } + load_export_specs(project, project_hash['export_specs']) if project_hash['export_specs'] + # Set all valid fields from hash: - project_hash = project_hash.inject(project_defaults) { |h, (k,v)| h[k] = v if Project.fields.keys.include?(k.to_s); h } + project_hash = project_hash.inject({}) { |h, (k,v)| h[k] = v if Project.fields.keys.include?(k.to_s); h } project.update project_hash - puts "Created project: #{project.title}" - # Load pages from content/*: content_path = Rails.root.join('project', project_key, 'content') puts "Loading pages from #{content_path}:" - prev_pages = project.pages project.pages = [] - Dir.foreach(content_path).each do |file| - path = Rails.root.join content_path, file - next if File.directory? path - next if ! ['.html','.erb','.md'].include? path.extname - ext = path.extname - page_key = file.split('.').first - name = page_key.capitalize - content = File.read path - - puts " Loading page: \"#{name}\" (#{content.size}b)" - if page_key == 'home' - project.home_page_content = content - + # Dir.foreach(content_path).each do |file| + # path = Rails.root.join content_path, file + # next if File.directory? path + # next if ! ['.html','.erb','.md'].include? path.extname + + # Load legacy pages from content folder directly: + Dir.glob("#{content_path}/*.{erb,html,md}").each do |path| + load_page project, path + end + + # Also load anything inside content/pages: + Dir.glob("#{content_path}/pages/*").each do |path| + if File.directory?(path) + load_page_group project, path else - # Set updated at if content changed: - updated_at = Time.now - if ! prev_pages.nil? && ! prev_pages.empty? - previous_page = prev_pages.select { |p| p[:key] == page_key } - if ! previous_page.empty? && (previous_page = previous_page.first) - updated_at = ! previous_page[:updated_at].nil? && previous_page[:content] == content ? previous_page[:updated_at] : Time.now - end - end - - # Check if we should include group browser content - group_match = //.match(content) - group_browser = '' - if group_match && !group_match.captures.empty? - group_browser = group_match.captures[0] - end - - project.pages << { - key: page_key, - name: name, - content: content, - updated_at: updated_at, - group_browser: group_browser - } + load_page project, path end end @@ -216,6 +180,72 @@ namespace :project do project end + def load_page_group(project, path) + base_key = File.basename path + + nav_content = nil + nav_path = File.join(path, "_nav.md") + if File.exist?(nav_path) + nav_content = File.read nav_path + puts "got nav: #{nav_content}" + end + + Dir.glob("#{path}/*.{erb,html,md}").each do |path| + load_page project, path, {base_key: base_key, nav: nav_content} unless File.basename(path).match(/^_/) + end + end + + def load_page(project, path, options = {}) + filename = File.basename path + + page_key = filename.split('.').first + name = page_key.capitalize + name = "#{options[:base_key].capitalize} | #{name}" if options[:base_key] + content = File.read path + + if page_key == 'home' + project.home_page_content = content + + else + # Set updated at if content changed: + updated_at = Time.now + if ! project.pages.nil? && ! project.pages.empty? + previous_page = project.pages.select { |p| p[:key] == page_key } + if ! previous_page.empty? && (previous_page = previous_page.first) + updated_at = ! previous_page[:updated_at].nil? && previous_page[:content] == content ? previous_page[:updated_at] : Time.now + end + end + + # PB 20160219 deprecating this cause doesn't appear in use + # Check if we should include group browser content + # group_match = //.match(content) + # group_browser = '' + # if group_match && !group_match.captures.empty? + # group_browser = group_match.captures[0] + # end + + # Place page nav in special page_navs hash by base key: + project.page_navs = {} if options[:nav] + project.page_navs[options[:base_key]] = options[:nav] if options[:nav] + + project.pages << { + key: ( options[:base_key].nil? ? '' : "#{options[:base_key]}/" ) + page_key, + name: name, + content: content, + updated_at: updated_at + # group_browser: group_browser + } + end + puts " Loaded page: \"#{options[:base_key]}/#{name}\" (#{content.size}b)" + + end + + def load_export_specs(project, config) + project.export_document_specs = config.map do |h| + Export::Spec::Document.from_hash h, project + end + end + def load_styles(project) load_images(project.key) @@ -400,8 +430,148 @@ namespace :project do end + desc "Build final_subject* data in database" + task :build_final_data, [:project_key, :rebuild, :start, :limit] => :environment do |task, args| + args.with_defaults rebuild: true, start: 0, limit: Float::INFINITY + rebuild = args[:rebuild] != 'false' + start = args[:start].to_i + limit = args[:limit].to_f + + project = project_by_key args[:project_key] + + start_time = Time.now + count = project.subject_sets.count + last_index = [count, start + limit - 1].min + step = [100, limit].min + built = 0 + + # puts "set: #{SubjectSet.find("5637a11432623300030a0100").inspect}" + # FinalSubjectSet.assert_for_set SubjectSet.find("56b115677061755afb539701"), rebuild + # FinalSubjectSet.assert_for_set FinalSubjectSet.find('56b118e07061755afbfcd801').subject_set, rebuild + # exit + + # Do any of this project's workflow tasks have configured export_names? If not, warn: + has_export_names = ! project.workflows.map { |w| w.tasks }.flatten.select { |t| ! t.export_name.blank? }.empty? + puts "WARNING: No export_names found in workflow configuration. This may make it tricky to interpret the field-level data. See `export_name` documentation in https://github.com/zooniverse/scribeAPI/wiki/Project-Workflows#tasks" if ! has_export_names + + if project.export_document_specs.blank? + puts "No export_spec configured; Add one before building" + exit + end + + # Rebuild indexes + FinalSubjectSet.rebuild_indexes Project.current + + (start..last_index).step(step).each do |offset| + sets = project.subject_sets.offset(offset).limit(step).each_with_index do |set, i| + + final_set = FinalSubjectSet.assert_for_set set, rebuild + built += 1 + + ellapsed = Time.now - start_time + per_set = ellapsed / built + remaining = per_set * (count - (offset + i+1)) / 60 / 60 + complete = (offset + i+1).to_f / count * 100 + $stderr.print "\r#{'%.8f' % complete}% complete. #{'%.1f' % remaining}h remaining. Built item #{offset +i+1} of #{count}" + end + end + + end + + desc "Using data in final_subject* collections, generate a series of JSON exports and attempt to create a downloadable ZIP" + task :export_final_data, [:project_key] => :environment do |task, args| + project = project_by_key args[:project_key] + # Make sure user has run build_final_data first: + if project.final_subject_sets.empty? + puts "No FinalSubjectSets found." + exit + end + + missing_env_keys = ['S3_EXPORT_BUCKET','S3_EXPORT_PATH','AWS_REGION','AWS_ACCESS_KEY_ID','AWS_SECRET_ACCESS_KEY'].select { |k| ENV[k].nil? } + if ! missing_env_keys.empty? + puts "Can not export data without setting #{missing_env_keys.join ", "}" + exit + end + s3client = Aws::S3::Client.new + + local_export_base = "#{Rails.root}/tmp/export/#{project.key}" + + # Remove previous: + # `rm -rf #{local_export_base}` if File.exists?(local_export_base) + + FileUtils.mkdir_p(local_export_base) unless File.exists?(local_export_base) + start = Time.now + built = 0 + limit = 100 + count = FinalSubjectSet.count + + (0..count).step(limit).each do |offset| + project.final_subject_sets.offset(offset).limit(limit).each_with_index do |set, i| + path = "#{local_export_base}/#{set.subject_set_id}.json" + content = FinalSubjectSetSerializer.new(set, root:false).to_json + File.open path, "w" do |f| + f << content + end + built += 1 + + # puts "Wrote #{i+1} of #{count}: #{content.size}b to #{path}" + ellapsed = Time.now - start + per_set = ellapsed / built + remaining = per_set * (count - (offset + i+1)) / 60 + complete = (offset + i+1).to_f / count * 100 + $stderr.print "\r#{'%.8f' % complete}% complete. #{'%.1f' % remaining}m remaining. Built #{offset +i+1} of #{count}" + end + end + + # Generate timestamped filename with random suffix so it can't be guessed: + rand_suffix = (('a'..'z').to_a + (0..9).to_a).shuffle[0,16].join + max_updated = project.final_subject_sets.max(:updated_at) + filename = "scribe-#{project.key}-#{max_updated.strftime("%F")}-#{rand_suffix}.tar.gz" + + # Zip it up + Rails.logger.info "Rake Complete, Begin GZIP, Go to S3" + sh %{cd #{local_export_base}; tar cfvz #{filename} --exclude '*.gz' .;} + Rails.logger.info "Tar-ing Complete" + + # Upload it to S3 + s3client = Aws::S3::Client.new + local_path = "#{local_export_base}/#{filename}" + remote_path = "#{ENV['S3_EXPORT_PATH']}/#{filename}" + + Rails.logger.info "Uploading #{local_path} to #{ENV['S3_EXPORT_BUCKET']}#{remote_path}" + s3client.put_object({ + acl: 'public-read', + bucket: ENV['S3_EXPORT_BUCKET'], + key: remote_path, + body: File.read(local_path) + }) + + # Remove local temp files + sh %{rm -rf #{local_export_base};} + + # Create the final-data-export record so it appears on /#/data/exports + s3_url = "http://#{ENV['S3_EXPORT_BUCKET']}/#{remote_path}" + FinalDataExport.create path: s3_url, num_final_subject_sets: count, project: project + + puts "Finished building exports. Download at: #{s3_url}" + + end + + desc "Convenience method that, in one call, builds all data JSONs and zips them up into a single ZIP release" + task :build_and_export_final_data, [:project_key, :rebuild, :ensure_day_of_week_is] => :environment do |task, args| + # If ensure_day_of_week_is given, proceed with execution only if weekday matches value + # (Important for heroku scheduler, which can schedule daily but not weekly) + if ! args[:ensure_day_of_week_is].blank? + if Date.today.strftime("%A").downcase != args[:ensure_day_of_week_is].downcase + puts "Aborting because today is not #{args[:ensure_day_of_week_is]}" + exit + end + end + Rake::Task['project:build_final_data'].invoke(args[:project_key], args[:rebuild]) + Rake::Task['project:export_final_data'].invoke(args[:project_key]) + end def translate_pick_one_tool_config(task_hash) config = task_hash[:tool_config] || {} @@ -487,5 +657,10 @@ namespace :project do end end + def project_by_key(key, default=Project.current) + p = Project.find_by key: key + p = default if ! p + p + end end diff --git a/package.json b/package.json index e08165df2..060c3cebf 100644 --- a/package.json +++ b/package.json @@ -15,6 +15,7 @@ "coffee-reactify": "^4.0.0", "coffee-script": "^1.9.3", "json-api-client": "^0.4.4", + "marked": "^0.3.5", "normalize-styl": "^3.0.3", "normalize.css": "^3.0.3", "react": "^0.13.3", diff --git a/project/emigrant/assets/css/styles.css b/project/emigrant/assets/css/styles.css index 44c2582c3..597f47159 100644 --- a/project/emigrant/assets/css/styles.css +++ b/project/emigrant/assets/css/styles.css @@ -98,6 +98,7 @@ html, body, width: 960px; margin: 20px auto; box-sizing: border-box; + padding: 0px; } @media screen and (max-width: 999px) { .page-content { @@ -111,7 +112,7 @@ html, body, background: rgba(255,255,255,0.85); padding: 20px 40px; } -.page-content.custom-page > div:nth-child(2) > *:first-child { +.page-content.custom-page h2 { color: #fff; background: #3f5765; margin-top: -20px; @@ -120,7 +121,21 @@ html, body, padding: 40px 0; border-top-left-radius: 6px; border-top-right-radius: 6px; + text-align: center; +} + +.page-content.custom-page .with-nav { + padding-left: 20px; } + +.page-content.custom-page .with-nav h2 { + background: transparent; + color: #3f5765; + text-align: left; + margin: 0; + padding: 10px 0; +} + .page-content > div.updated-at { border-top-left-radius: 0; border-top-right-radius: 0; @@ -137,7 +152,6 @@ html, body, .page-content h2 { font-size: 36px; font-weight: 400; - text-align: center; } .page-content h3 { font-size: 28px; diff --git a/project/emigrant/assets/images/pageuri.png b/project/emigrant/assets/images/pageuri.png new file mode 100644 index 000000000..fa75f2334 Binary files /dev/null and b/project/emigrant/assets/images/pageuri.png differ diff --git a/project/emigrant/assets/images/viewrecorddc.mp4 b/project/emigrant/assets/images/viewrecorddc.mp4 new file mode 100644 index 000000000..bb252a545 Binary files /dev/null and b/project/emigrant/assets/images/viewrecorddc.mp4 differ diff --git a/project/emigrant/bot-example.rb b/project/emigrant/bot-example.rb new file mode 100644 index 000000000..dcdd14512 --- /dev/null +++ b/project/emigrant/bot-example.rb @@ -0,0 +1,151 @@ + +require 'open-uri' +require 'json' +require 'cgi' + +# Useful extension to Hash to create query strings: +class Hash + def to_params + params = '' + stack = [] + + each do |k, v| + if v.is_a?(Hash) + stack << [k,v] + elsif v.is_a?(Array) + stack << [k,Hash.from_array(v)] + else + params << "#{k}=#{v}&" + end + end + + stack.each do |parent, hash| + hash.each do |k, v| + if v.is_a?(Hash) + stack << ["#{parent}[#{k}]", v] + else + params << "#{parent}[#{k}]=#{v}&" + end + end + end + + params.chop! + params + end + + def self.from_array(array = []) + h = Hash.new + array.size.times do |t| + h[t] = array[t] + end + h + end + +end + +# Example Scribe bot class: +class ScribeBot + + def initialize(scribe_endpoint) + @classifications_endpoint = scribe_endpoint + end + + # Post classification for a known subject_id + def classify_subject_by_id(subject_id, workflow_name, task_key, data) + params = { + workflow: { + name: workflow_name + }, + classifications: { + annotation: data, + task_key: task_key, + subject_id: subject_id + } + } + + submit_classification params + end + + # Post classification for subject specified by URL: + def classify_subject_by_url(subject_url, workflow_name, task_key, data) + params = { + subject: { + location: { + standard: CGI::escape(subject_url) + } + }, + workflow: { + name: workflow_name + }, + classifications: { + annotation: data, + task_key: task_key + } + } + + submit_classification params + end + + # Posts params as-is to classifications endpoint: + def submit_classification(params) + + require 'uri' + require "net/http" + + uri = URI(@classifications_endpoint) + + req = Net::HTTP::Post.new(uri.path, {'BOT_AUTH' => ENV['SCRIBE_BOT_TOKEN']}) + req.body = params.to_params + http = Net::HTTP.new(uri.host, uri.port) + + response = http.start {|http| http.request(req) } + + begin + JSON.parse response.body + rescue + nil + end + end +end + +# This simple script demonstrates use of the Scribe Classifications endpoint to generate data +# +# Useage: +# ruby bot-example.rb [-scribe-endpoint="http://localhost:3000"] +# + +options = Hash[ ARGV.join(' ').scan(/--?([^=\s]+)(?:=(\S+))?/) ] +options["scribe-endpoint"] = "http://localhost:3000/classifications" if ! options["scribe-endpoint"] + +args = ARGV.select { |a| ! a.match /^-/ } + +bot = ScribeBot.new options["scribe-endpoint"] + +# The following generates generates two classfiications: One mark classification +# and one transcription classification (applied to the subject generated by the +# mark classification). + +# Specify subject by standard URL (since this is a bot classification, it will be created automatically if it doesn't exist) +image_uri = "https://s3.amazonaws.com/scribe.nypl.org/emigrant-s4/full/619aed10-23fd-0133-16de-58d385a7bbd0.right-bottom.jpg" + +# Must manually specify workflow name ('mark'), and task_key ('mark_primary') +classification = bot.classify_subject_by_url( image_uri, "mark", "mark_primary", { + x: 100, + y: 200, + width: 300, + height: 200, + subToolIndex: 0 # Must specify subToolIndex (integer index into the tools array configured for workflow task) +})['classification'] + +# Response should contain a classification with a nested child_subject: +puts "Created classification: #{classification.to_json}" + +# Assuming above was successful, use the returned, generated subject_id to create next classification: +mark_id = classification['child_subject']['id'] +# Subjects generated in Mark tend to have `type`s that correspond to Transcribe task keys: +transcribe_task_key = classification['child_subject']['type'] +# Create transcription classification: +classification = bot.classify_subject_by_id( mark_id, "transcribe", transcribe_task_key, { value: 'foo' }) + +# Response should contain a classification with a nested verify subject (or orphaned subject if there is no Verify workflow) +puts "Created transcription classification: #{classification.to_json}" diff --git a/project/emigrant/content/about.html.erb b/project/emigrant/content/about.html.erb index cfc8ca848..de3e4191f 100644 --- a/project/emigrant/content/about.html.erb +++ b/project/emigrant/content/about.html.erb @@ -3,6 +3,9 @@ ### Contact information Questions? Comments? Contact us at emigrantcity@nypl.org or reach out to us on [Twitter](https://twitter.com/nypl_labs). +### Data +This is an active project and we’re continuing to gather data from the records. Every two weeks, we build a merged, anonymized dump of that data. You can browse or download the entire data set on the Data page. + ### About Emigrant Bank [Emigrant Bank](https://www.emigrant.com/Information/aboutus/AboutUs.jsp) was founded in 1850 by members of the Irish Emigrant society to serve the needs of the Irish immigrant community in New York. In its early history, the bank grew to become the seventh largest bank in the nation, and it made major investments in the growth of New York City by underwriting loans for such important initiatives as the construction of St. Patrickís Cathedral and a public works project that ultimately became Central Park. In 1995, Emigrant Bank generously donated to The New York Public Library [extensive archival records](http://archives.nypl.org/mss/925) that are valuable historical and genealogical resources documenting the lives of immigrant families. The Library microfilmed the entire collection and compiled a [detailed finding aid](http://archives.nypl.org/uploads/documents/documentation/collection_1837_mss925-extra.pdf) that outlines the full scope of the Emigrant Savings Bank records. This heavily trafficked collection is housed in the [Manuscripts and Archives Division](http://www.nypl.org/locations/divisions/manuscripts-division). Though, users primarily encounter it through the [Irma and Paul Milstein Division of United States History, Local History and Genealogy](http://www.nypl.org/locations/divisions/milstein). diff --git a/project/emigrant/content/data.html.erb b/project/emigrant/content/pages/data.md similarity index 77% rename from project/emigrant/content/data.html.erb rename to project/emigrant/content/pages/data.md index d711efd30..b0d4a9733 100644 --- a/project/emigrant/content/data.html.erb +++ b/project/emigrant/content/pages/data.md @@ -1,6 +1,6 @@ ## Data exports -With help from volunteers like yourself, we are extracting structured, building-level information from about 6,400 mortgages contained in the Emigrant Savings Bank Records at The New York Public Library. Of course, the ultimate goal is to make this data publicly available. Having a keyword-searchable and structured index of names and mortgage details will of great use to genealogists, historians, digital humanities researchers, and others interested in exploring historical data sources. +Participants have made {{project.classification_count}} contributions to Emigrant City to date. This project periodically builds a merged, anonymized dump of that data, which is made public here. This is an active project and we’re continuing to gather data from the records. (You can [help](/#/intro)!) The data made available here is refreshed weekly. After enough transcriptions are made, our team will determine the best way to get the data to you in an easy and accessible way. In the meantime, feel free to explore the collection assets themselves on the Library's [Digital Collections](http://digitalcollections.nypl.org/collections/emigrant-savings-bank-records) website. In this project, we're working with the subset of these digitized materials, the volumes containing Mortgage and Bond records. The volumes appearing in Emigrant City are: * [Bond and Mortgage Record Book 1 (1 to 1,555)](http://digitalcollections.nypl.org/items/df712aa0-00b1-0133-fbd7-58d385a7bbd0) @@ -15,6 +15,6 @@ After enough transcriptions are made, our team will determine the best way to ge * [Real Estate Loans No. 13](http://digitalcollections.nypl.org/items/3edf3050-24cd-0133-e6df-58d385a7b928) -You may also be interested to explore how we’ve been experimenting with opening up data from other crowdsourcing projects: +You may also be interested to explore how we’ve been experimenting with opening up data from other participatory projects: * [What's on the Menu?](http://menus.nypl.org/data) * [Building Inspector](http://buildinginspector.nypl.org/data) diff --git a/project/emigrant/content/pages/data/_nav.md b/project/emigrant/content/pages/data/_nav.md new file mode 100644 index 000000000..e8b747d38 --- /dev/null +++ b/project/emigrant/content/pages/data/_nav.md @@ -0,0 +1,4 @@ + * [About](/#/data) + * [Browse](/#/data/browse) + * [Download](/#/data/download) + * [Tips & Tricks](/#/data/tips) diff --git a/project/emigrant/content/pages/data/download.md b/project/emigrant/content/pages/data/download.md new file mode 100644 index 000000000..8ee57ac6c --- /dev/null +++ b/project/emigrant/content/pages/data/download.md @@ -0,0 +1,11 @@ +## Download + +Participants have made {{project.classification_count}} contributions to {{project.title}} to date. This project periodically builds a merged, anonymized dump of that data, which is made public here. + +This is a large dataset in json format containing all the assertions, confidence ratings, and fields for the {{project.root_subjects_count}} records in {{project.title}}. We are actively gathering contributions to the data set and it is refreshed weekly. The last dump was {{project.latest_export.created_at}}. + +Download Latest Raw Data + +For help interpretting the data, see Scribe WIKI on Data Exports. + +To browse past releases and/or to be notified when new releases are made, you may wish to subscribe to the ATOM Feed of Data Releases diff --git a/project/emigrant/content/pages/data/tips.md b/project/emigrant/content/pages/data/tips.md new file mode 100644 index 000000000..22b42e93f --- /dev/null +++ b/project/emigrant/content/pages/data/tips.md @@ -0,0 +1,48 @@ +## Tips & Tricks + +### Search +* **Use quotation marks to search for full phrases.** +[Search results](http://emigrantcity.nypl.org/?#/data/browse?keyword=margaret%20armstrong) for Margaret Armstrong will display records with Margaret and/or Armstrong in the record. However, [search results](http://emigrantcity.nypl.org/?#/data/browse?keyword=%22margaret%20armstrong%22) for "Margaret Armstrong" display only records where the full name "Margaret Armstrong" appear. + +* **Capitalization does not matter in search.** +"MARGARET," "margaret," and "Margaret" entered into the search box, will yield the same results. + +* **Consider abbreviations.** +We asked users to transcribe exactly what they see written on the records including abbreviations. The word Brooklyn may appear as “BKLYN” in one record and “Brooklyn” in another. + +### Reading a Record +The data in these records have been communally created through the Emigrant City participatory project. See the Intro for more information on the steps for publicly and collaboratively creating this resource. As you'll see below, we've tried to be transparent about the collaborative nature of this resource with notations about confidence and status. + +* **Best Data versus All Data** +A record's best data consolidates and lightly cleans all the fields for that record. During the transcription process, the same field may have been marked multiple times and made it all the way through the Scribe work-flow resulting in duplications of that unique field. Best data consolidates these duplications. There is also minimal formatting cleanup. For instance, in Best Data Amount Loaned is represented as a dollar amount rather than just a number. + +* **What is the Source Metadata?** +This page contains technical details related to the transcription interface and includes a link to the high res version of the record page. + +* **What is the confidence field?** +Record fields were created through contributions from many users. A result, we can gauge how confident we are about each field's accuracy. Fields with an 100% confidence ratings are fields for which every transcription was the same. Lower confidence ratings mean that there was disagreement on how best to transcribe a field. + +* **What is a field's status?** +The status of each field is displayed. This corresponds with where the field's transcription is in the work-flow of the project be it Mark, Transcribe, or Verify. + +* **What are distinct transcriptions?** +Each field is annotated with the number of distinct transcriptions they have received during the project's run. A distinct transcription of 1 means everyone transcribed the same thing for that field. + +* **High res images of the records** +Emigrant City is a project and resource to browse historic mortgage records from the Emigrant Savings Bank. You may view and/or download high resolution images of these records on our Digital Collections site. If you would like to view the high resolution version of a record, navigate to the Source Metadata tab and click on the Page Uri Link. A high resolution image of the record's full page will open in a new tab. + + + + + +* **Which fields appear?** +This project intended to create a index to enable further discovery. With this project, the goal is to create an index of the bond and mortgage records to enable further discovery and use. We’ve worked with experts on the material to scope the data collected to fields which appear with greater regularity across documents: Record Date, Record Number, Mortgager Name, Street Address, Amount Loaned, Valuation, and Dimension & Description. Collecting and verifying these fields from the records creates resource that opens up the search of these materials. However, the data being gathered is not an exhaustive. Rather than creating a complete transcription of each record, the transcribed fields create an index to give some toeholds for future reference and querying of these rich materials which previously were largely invisible and difficult to search. + +* **How often is the data refreshed?** +Contributions are still actively being made to Emigrant City. The records available in browse and download will be refreshed weekly with these new contributions. (You can still contribute to the project by transcribing and verifying data.) + + + diff --git a/project/emigrant/content/pages/data_new.md b/project/emigrant/content/pages/data_new.md new file mode 100644 index 000000000..d7320981f --- /dev/null +++ b/project/emigrant/content/pages/data_new.md @@ -0,0 +1,21 @@ +## Data exports + +Participants have made {{project.classification_count}} contributions to Emigrant City to date. This project periodically builds a merged, anonymized dump of that data, which is made public here. This is an active project and we’re continuing to gather data from the records. The data made available here is refreshed weekly. + +## Source Assets +Feel free to explore the collection assets themselves on the Library's [Digital Collections](http://digitalcollections.nypl.org/collections/emigrant-savings-bank-records) website. In this project, we're working with the subset of these digitized materials, the volumes containing Mortgage and Bond records. The volumes appearing in Emigrant City are: +* [Bond and Mortgage Record Book 1 (1 to 1,555)](http://digitalcollections.nypl.org/items/df712aa0-00b1-0133-fbd7-58d385a7bbd0) +* [Bond and Mortgage Record Book 2 (1,556 to 2, 721)](http://digitalcollections.nypl.org/items/c0c38370-015a-0133-065e-58d385a7bbd0) +* [Bond and Mortgage Record Book 3 (2,722 to 3,699)](http://digitalcollections.nypl.org/items/5bb969d0-0241-0133-f196-58d385a7b928) +* [Bond and Mortgage Record Book 4 (3,700 to 4,499)](http://digitalcollections.nypl.org/items/109c0900-02e7-0133-03cf-58d385a7bbd0) +* [Bond and Mortgage Record Book 5 (4,500 to 5,499)](http://digitalcollections.nypl.org/items/e53b4fe0-02fc-0133-0e0d-58d385a7bbd0) +* [Bond and Mortgage Record Book 6 (5,500 to 6,403)](http://digitalcollections.nypl.org/items/20aa00a0-0311-0133-9d30-58d385a7bbd0) +* [Real Estate Loans No. 9](http://digitalcollections.nypl.org/items/6cf0ed60-23ef-0133-6b54-58d385a7b928) +* [Real Estate Loans No. 10](http://digitalcollections.nypl.org/items/59b0a100-23fd-0133-b24f-58d385a7bbd0) +* [Real Estate Loans No. 11](http://digitalcollections.nypl.org/items/cf0c3ee0-24bd-0133-5e2d-58d385a7b928) +* [Real Estate Loans No. 13](http://digitalcollections.nypl.org/items/3edf3050-24cd-0133-e6df-58d385a7b928) + + +You may also be interested to explore how we’ve been experimenting with opening up data from other crowdsourcing projects: +* [What's on the Menu?](http://menus.nypl.org/data) +* [Building Inspector](http://buildinginspector.nypl.org/data) diff --git a/project/emigrant/content/pages/data_new/_nav.md b/project/emigrant/content/pages/data_new/_nav.md new file mode 100644 index 000000000..c7568b707 --- /dev/null +++ b/project/emigrant/content/pages/data_new/_nav.md @@ -0,0 +1,4 @@ + * [About](/#/data_new) + * [Browse](/#/data_new/browse) + * [Download](/#/data_new/download) + * [Tips & Tricks](/#/data_new/tips) diff --git a/project/emigrant/content/pages/data_new/download.md b/project/emigrant/content/pages/data_new/download.md new file mode 100644 index 000000000..8ee57ac6c --- /dev/null +++ b/project/emigrant/content/pages/data_new/download.md @@ -0,0 +1,11 @@ +## Download + +Participants have made {{project.classification_count}} contributions to {{project.title}} to date. This project periodically builds a merged, anonymized dump of that data, which is made public here. + +This is a large dataset in json format containing all the assertions, confidence ratings, and fields for the {{project.root_subjects_count}} records in {{project.title}}. We are actively gathering contributions to the data set and it is refreshed weekly. The last dump was {{project.latest_export.created_at}}. + +Download Latest Raw Data + +For help interpretting the data, see Scribe WIKI on Data Exports. + +To browse past releases and/or to be notified when new releases are made, you may wish to subscribe to the ATOM Feed of Data Releases diff --git a/project/emigrant/content/pages/data_new/tips.md b/project/emigrant/content/pages/data_new/tips.md new file mode 100644 index 000000000..22b42e93f --- /dev/null +++ b/project/emigrant/content/pages/data_new/tips.md @@ -0,0 +1,48 @@ +## Tips & Tricks + +### Search +* **Use quotation marks to search for full phrases.** +[Search results](http://emigrantcity.nypl.org/?#/data/browse?keyword=margaret%20armstrong) for Margaret Armstrong will display records with Margaret and/or Armstrong in the record. However, [search results](http://emigrantcity.nypl.org/?#/data/browse?keyword=%22margaret%20armstrong%22) for "Margaret Armstrong" display only records where the full name "Margaret Armstrong" appear. + +* **Capitalization does not matter in search.** +"MARGARET," "margaret," and "Margaret" entered into the search box, will yield the same results. + +* **Consider abbreviations.** +We asked users to transcribe exactly what they see written on the records including abbreviations. The word Brooklyn may appear as “BKLYN” in one record and “Brooklyn” in another. + +### Reading a Record +The data in these records have been communally created through the Emigrant City participatory project. See the Intro for more information on the steps for publicly and collaboratively creating this resource. As you'll see below, we've tried to be transparent about the collaborative nature of this resource with notations about confidence and status. + +* **Best Data versus All Data** +A record's best data consolidates and lightly cleans all the fields for that record. During the transcription process, the same field may have been marked multiple times and made it all the way through the Scribe work-flow resulting in duplications of that unique field. Best data consolidates these duplications. There is also minimal formatting cleanup. For instance, in Best Data Amount Loaned is represented as a dollar amount rather than just a number. + +* **What is the Source Metadata?** +This page contains technical details related to the transcription interface and includes a link to the high res version of the record page. + +* **What is the confidence field?** +Record fields were created through contributions from many users. A result, we can gauge how confident we are about each field's accuracy. Fields with an 100% confidence ratings are fields for which every transcription was the same. Lower confidence ratings mean that there was disagreement on how best to transcribe a field. + +* **What is a field's status?** +The status of each field is displayed. This corresponds with where the field's transcription is in the work-flow of the project be it Mark, Transcribe, or Verify. + +* **What are distinct transcriptions?** +Each field is annotated with the number of distinct transcriptions they have received during the project's run. A distinct transcription of 1 means everyone transcribed the same thing for that field. + +* **High res images of the records** +Emigrant City is a project and resource to browse historic mortgage records from the Emigrant Savings Bank. You may view and/or download high resolution images of these records on our Digital Collections site. If you would like to view the high resolution version of a record, navigate to the Source Metadata tab and click on the Page Uri Link. A high resolution image of the record's full page will open in a new tab. + + + + + +* **Which fields appear?** +This project intended to create a index to enable further discovery. With this project, the goal is to create an index of the bond and mortgage records to enable further discovery and use. We’ve worked with experts on the material to scope the data collected to fields which appear with greater regularity across documents: Record Date, Record Number, Mortgager Name, Street Address, Amount Loaned, Valuation, and Dimension & Description. Collecting and verifying these fields from the records creates resource that opens up the search of these materials. However, the data being gathered is not an exhaustive. Rather than creating a complete transcription of each record, the transcribed fields create an index to give some toeholds for future reference and querying of these rich materials which previously were largely invisible and difficult to search. + +* **How often is the data refreshed?** +Contributions are still actively being made to Emigrant City. The records available in browse and download will be refreshed weekly with these new contributions. (You can still contribute to the project by transcribing and verifying data.) + + + diff --git a/project/emigrant/project.json b/project/emigrant/project.json index 9a4f45bf8..ba0284c87 100644 --- a/project/emigrant/project.json +++ b/project/emigrant/project.json @@ -46,6 +46,8 @@ "google_analytics_client_id": "UA-69673163-1" }, + "discuss_url": "http://forum.emigrantcity.nypl.org", + "forum": { "type": "discourse", "base_url": "http://forum.emigrantcity.nypl.org" @@ -58,5 +60,43 @@ {"label": "About", "page": "about"} ], "footer": [] - } + }, + + "export_specs": [ + { + "name": "Record", + "spec_fields": [ + {"name": "Mortgager", "repeats": false}, + {"name": "Street Address", "format": "address", "repeats": false}, + {"name": "Record Date", "format": "date", "format_options": {"range": [1850,1950]}, "repeats": false}, + { + "name": "Land & Building Dimensions", + "format": { + "em_survey_land_dimensions": "dimensions", + "em_survey_building_dimensions": "dimensions" + }, + "repeats": false + }, + {"name": "Amount Loaned", "format": "monetary", "repeats": false}, + {"name": "Stories & Materials", "format": {"em_record_stories": "numeric"}, "repeats": false}, + { + "name": "Valuation", + "select": "Total Value", + "repeats": true, + "sub_fields": [ + {"name": "Total Value", "format": "monetary", "repeats": false}, + {"name": "Date", "select": "Valuation Date", "format": "date", "repeats": false}, + { + "name": "Land & Building Value", + "select": "Land & Building Value", + "format": {"em_valuation_ground": "monetary", "em_valuation_building": "monetary"}, + "repeats": false + } + ] + }, + {"name": "Record Number", "format": "numeric", "repeats": false}, + {"name": "Additional Info", "repeats": true} + ] + } + ] } diff --git a/project/emigrant/scripts/query_subjects.rb b/project/emigrant/scripts/query_subjects.rb index a4a8340d7..589ca7ce3 100644 --- a/project/emigrant/scripts/query_subjects.rb +++ b/project/emigrant/scripts/query_subjects.rb @@ -11,7 +11,8 @@ client = NyplRepo::Client.new ENV['DC_API_KEY'] item_uuids = [ - "be6d6300-ecf4-0132-456e-58d385a7b928", # Book 1 (1 to 1,555) http://digitalcollections.nypl.org/items/df712aa0-00b1-0133-fbd7-58d385a7bbd0 +=begin +"be6d6300-ecf4-0132-456e-58d385a7b928", # Book 1 (1 to 1,555) http://digitalcollections.nypl.org/items/df712aa0-00b1-0133-fbd7-58d385a7bbd0 "bf0c1890-ecf4-0132-faa2-58d385a7b928", # Book 2 (1,556 to 2, 721) http://digitalcollections.nypl.org/items/c0c38370-015a-0133-065e-58d385a7bbd0 "bfe9fbe0-ecf4-0132-7e52-58d385a7b928", # Book 3 (2,722 to 3,699) http://digitalcollections.nypl.org/items/5bb969d0-0241-0133-f196-58d385a7b928 "c0921750-ecf4-0132-1737-58d385a7b928", # Book 4 (3,700 to 4,499) http://digitalcollections.nypl.org/items/109c0900-02e7-0133-03cf-58d385a7bbd0 @@ -20,6 +21,8 @@ "c53c32d0-ecf4-0132-b51f-58d385a7b928", # Real Estate Loans No. 9 http://digitalcollections.nypl.org/items/6cf0ed60-23ef-0133-6b54-58d385a7b928 "c5d23760-ecf4-0132-8bed-58d385a7b928", # Real Estate Loans No. 10 http://digitalcollections.nypl.org/items/59b0a100-23fd-0133-b24f-58d385a7bbd0 "c6697fe0-ecf4-0132-b1fc-58d385a7b928", # Real Estate Loans No. 11 http://digitalcollections.nypl.org/items/cf0c3ee0-24bd-0133-5e2d-58d385a7b928 +=end + "c7d4b670-ecf4-0132-5854-58d385a7b928", # Real Estate Loans No. 11 http://digitalcollections.nypl.org/items/01af2f60-8701-0133-b22c-00505686a51c ] @@ -45,7 +48,7 @@ end end -out_path = "#{File.dirname(File.dirname(__FILE__))}/subjects/subjects_from_api.building.csv" +out_path = "#{File.dirname(File.dirname(__FILE__))}/subjects/subjects_from_api.book14.csv" CSV.open(out_path, "wb") do |csv| csv << subjects.first.keys diff --git a/project/emigrant/workflows/transcribe.json b/project/emigrant/workflows/transcribe.json index 2bb1f9f0d..ffdc47d1b 100644 --- a/project/emigrant/workflows/transcribe.json +++ b/project/emigrant/workflows/transcribe.json @@ -19,7 +19,8 @@ "help": { "file": "t_record_date" }, - "generates_subject_type": "em_transcribed_date" + "generates_subject_type": "em_transcribed_date", + "export_name": "Record Date" }, "em_record_number": { @@ -30,7 +31,8 @@ "help": { "file": "t_record_number" }, - "generates_subject_type": "em_transcribed_record_number" + "generates_subject_type": "em_transcribed_record_number", + "export_name": "Record Number" }, "em_record_mortgager": { @@ -40,7 +42,8 @@ "generates_subject_type": "em_transcribed_mortgager", "help": { "file": "t_record_mortgager" - } + }, + "export_name": "Mortgager" }, "em_record_street_address": { @@ -51,7 +54,8 @@ "generates_subject_type": "em_transcribed_address", "help": { "file": "t_record_street_address" - } + }, + "export_name": "Street Address" }, "em_record_amount_loaned": { @@ -62,7 +66,8 @@ "generates_subject_type": "em_transcribed_amount_loaned", "help": { "file": "t_record_amount_loaned" - } + }, + "export_name": "Amount Loaned" }, "em_record_valuation": { @@ -74,7 +79,8 @@ "file": "t_record_valuation" }, "generates_subject_type": "em_transcribed_valuation_date", - "next_task": "em_record_valuation_ground_building" + "next_task": "em_record_valuation_ground_building", + "export_name": "Valuation Date" }, "em_record_valuation_ground_building": { @@ -101,7 +107,8 @@ }, "generates_subject_type": "em_transcribed_valuation_itemized", "instruction": "Sometimes valuations include itemized dollar values for \"ground\" and \"building\". Enter these amounts if you can find them. In the next screen, you'll enter the total valuation.", - "next_task": "em_record_valuation_total" + "next_task": "em_record_valuation_total", + "export_name": "Land & Building Value" }, "em_record_valuation_total": { @@ -113,7 +120,8 @@ "file": "t_record_valuation" }, "generates_subject_type": "em_transcribed_valuation_total", - "next_task": null + "next_task": null, + "export_name": "Total Value" }, "em_record_survey": { @@ -138,7 +146,8 @@ }, "generates_subject_type": "em_transcribed_survey", "instruction": "Enter, as they appear, any land and building dimensions that were recorded. In the next screen, you'll enter the descriptive information.", - "next_task": "em_record_survey_stories_materials" + "next_task": "em_record_survey_stories_materials", + "export_name": "Land & Building Dimensions" }, "em_record_survey_stories_materials": { @@ -163,7 +172,8 @@ }, "generates_subject_type": "em_transcribed_stories_materials", "instruction": "Enter, as they appear, the number of stories and the building materials.", - "next_task": "em_record_survey_additional_info" + "next_task": "em_record_survey_additional_info", + "export_name": "Stories & Materials" }, "em_record_survey_additional_info": { @@ -175,7 +185,8 @@ "file": "t_record_additional_info" }, "generates_subject_type": null, - "next_task": null + "next_task": null, + "export_name": "Additional Info" } } }