diff --git a/Gemfile b/Gemfile index 70b8a49f..6c79e565 100644 --- a/Gemfile +++ b/Gemfile @@ -14,7 +14,7 @@ gem 'omniauth-facebook' gem "omniauth-google-oauth2" gem 'omniauth-zooniverse', '~> 0.0.3' -gem 'mongoid', '~> 4.0.2' +gem 'mongoid' # , '~> 4.0.2' gem 'active_model_serializers' gem 'mongoid-serializer' gem 'rack-cors', :require => 'rack/cors' @@ -38,6 +38,8 @@ gem 'puma', '~> 2.14.0' gem 'logstasher', '~> 0.6' +# gem 'mongoid_fulltext' + group :development do gem 'dotenv-rails' end diff --git a/Gemfile.lock b/Gemfile.lock index 1e5a1a39..c4875856 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -322,7 +322,7 @@ DEPENDENCIES kaminari launchy logstasher (~> 0.6) - mongoid (~> 4.0.2) + mongoid mongoid-rspec (>= 1.6.0)! mongoid-serializer moped diff --git a/app/assets/images/checkmark.png b/app/assets/images/checkmark.png new file mode 100644 index 00000000..feaa040d Binary files /dev/null and b/app/assets/images/checkmark.png differ diff --git a/app/assets/javascripts/components/app-router.cjsx b/app/assets/javascripts/components/app-router.cjsx index ba99b6ce..2ee12f02 100644 --- a/app/assets/javascripts/components/app-router.cjsx +++ b/app/assets/javascripts/components/app-router.cjsx @@ -11,6 +11,8 @@ Verify = require './verify' # TODO Group routes currently not implemented GroupPage = require './group-page' GroupBrowser = require './group-browser' +FinalSubjectSetBrowser = require './final-subject-set-browser' +FinalSubjectSetPage = require './final-subject-set-page' Project = require 'models/project.coffee' @@ -74,6 +76,20 @@ class AppRouter name={workflow.name + '_entire_page'} /> } + + + { if project.downloadable_data + + } + { # Project-configured pages: project.pages?.map (page, key) => - previously_saved = @state.user?.tutorial_complete? + previously_saved = @state.user?.tutorial_complete # Immediately ammend user object with tutorial_complete flag so that we can hide the Tutorial: @setState user: $.extend(@state.user ? {}, tutorial_complete: true) diff --git a/app/assets/javascripts/components/final-subject-set-browser.cjsx b/app/assets/javascripts/components/final-subject-set-browser.cjsx new file mode 100644 index 00000000..3e99c5a7 --- /dev/null +++ b/app/assets/javascripts/components/final-subject-set-browser.cjsx @@ -0,0 +1,156 @@ +React = require 'react' +{Navigation} = require 'react-router' +API = require '../lib/api' +Project = require 'models/project.coffee' +GenericButton = require('components/buttons/generic-button') + +module.exports = React.createClass + displayName: 'FinalSubjectSetBrowser' + + mixins: [Navigation] + + getInitialState:-> + entered_keyword: @props.query.keyword + searched_keyword: null + fetching_keyword: null + current_page: 1 + more_pages: false + results: [] + project: null + + componentDidMount: -> + @checkKeyword() + + API.type('projects').get().then (result)=> + @setState project: new Project(result[0]) + + componentWillReceiveProps: (new_props) -> + @checkKeyword new_props + + checkKeyword: (props = @props) -> + if props.query.keyword + @fetch props.query.keyword + + fetch: (keyword, page = 1) -> + return if ! @isMounted() + + if keyword != @state.fetching_keyword + + results = @state.results + results = [] if @state.searched_keyword != keyword + @setState fetching_keyword: keyword, fetching_page: page, results: results, () => + per_page = 20 + params = + keyword: keyword + per_page: per_page + page: @state.fetching_page + + API.type('final_subject_sets').get(params).then (sets) => + results = @state.results + offset = (@state.fetching_page-1) * per_page + for s,i in sets + results[i + offset] = s + @setState + results: results + searched_keyword: @props.query.keyword + current_page: @state.fetching_page + fetching_keyword: null + fetching_page: null + more_pages: sets?[0]?.getMeta('next_page') + + handleKeyPress: (e) -> + if @isMounted() + + if [13].indexOf(e.keyCode) >= 0 # ENTER: + @search e.target.value + + search: (keyword) -> + keyword = @refs.search_input?.getDOMNode().value.trim() unless keyword? + + @transitionTo "final_subject_sets", null, {keyword: keyword} + + loadMore: -> + @fetch @state.searched_keyword, @state.current_page + 1 + + handleChange: (e) -> + @setState entered_keyword: e.target.value + + render: -> + return null if ! @state.project? + +
+ + { if ! @state.project.downloadable_data +
+

Data Exports Not Available

+

Sorry, but public data exports are not enabled for this project yet.

+
+ + else +
+ Download Latest Raw Data + + +

Data Exports

+ + { if ! @state.searched_keyword +
+

Download

+ +

Participants have made {@state.project.classification_count.toLocaleString()} contributions to {@state.project.title} to date. This project periodically builds a merged, anonymized dump of that data, which is made public here.

+ +

You can download the latest using the button in the upper-right. For help interpretting the data, see Scribe WIKI on Data Exports.

+ +

Browse

+ +

Preview the data by searching by keyword below:

+
+ } + +
+ + +
+ + { if @state.searched_keyword && @state.results.length == 0 +

No matches yet for "{@state.searched_keyword}"

+ + else if @state.results.length > 0 +
+

Found {@state.results[0].getMeta('total')} matches

+
    + { for set in @state.results + url = "/#/data/exports/#{set.id}?keyword=#{@state.searched_keyword}" + matches = [] + safe_keyword = (w.replace(/\W/g, "\\$&") for w in @state.searched_keyword.toLowerCase().replace(/"/g,'').split(' ')).join("|") + regex = new RegExp("(#{safe_keyword})", 'gi') + for k of set.search_terms_by_field + matches.push(field: k, term: v) for v in set.search_terms_by_field[k] when v.match(regex) +
  • +
    + + + +
    +
    + { for m,i in matches[0...2] + + } +
    +
  • + } +
+ { if @state.more_pages + + } +
+ } +
+ } +
+ diff --git a/app/assets/javascripts/components/final-subject-set-page.cjsx b/app/assets/javascripts/components/final-subject-set-page.cjsx new file mode 100644 index 00000000..4cdcd27e --- /dev/null +++ b/app/assets/javascripts/components/final-subject-set-page.cjsx @@ -0,0 +1,79 @@ +React = require 'react' +API = require '../lib/api' + +GenericButton = require('components/buttons/generic-button') + +module.exports = React.createClass + displayName: 'FinalSubjectSetPage' + + getInitialState:-> + set: null + + componentDidMount: -> + API.type("final_subject_sets").get(@props.params.final_subject_set_id).then (set) => + @setState + set: set + + render: -> + return null if ! @state.set + +
+
+ Download Raw Data +

Set {@state.set.id}

+ +
    + { for subject in @state.set.subjects +
  • + +
      + { + assertions = subject.assertions.sort (a1,a2) -> + if a1.region.y < a2.region.y + -1 + else + 1 + null + } + { for assertion,i in assertions when assertion.name +
    • +

      {assertion.name}

      + +
        + { for k of assertion.data + console.log "assertion data: ", k, assertion.data +
      • + {assertion.data[k]} + { if k != 'value' + ({k.replace /_/g, ' '}) + } +
      • + } +
      +
      +
      Confidence
      +
      {Math.round(100 * assertion.confidence)}%
      +
      Status
      +
      {assertion.status.replace /_/, ' '}
      +
      Distinct Classifications
      +
      {assertion.classifications?.length || 0}
      +
      + { + viewer_width = assertion.region.width + scale = viewer_width / assertion.region.width + s = + background: "url(#{subject.location.standard}) no-repeat -#{Math.round(assertion.region.x * scale)}px -#{Math.round(assertion.region.y * scale)}px" + width: viewer_width + 'px' + height: Math.round(assertion.region.height * scale) + 'px' +
      + } +
    • + } +
    + +
  • + } +
+
+
+ diff --git a/app/assets/javascripts/components/mark/index.cjsx b/app/assets/javascripts/components/mark/index.cjsx index 410306cc..508be0cd 100644 --- a/app/assets/javascripts/components/mark/index.cjsx +++ b/app/assets/javascripts/components/mark/index.cjsx @@ -15,6 +15,7 @@ HideOtherMarksButton = require 'components/buttons/hide-other-marks-button' DraggableModal = require 'components/draggable-modal' Draggable = require 'lib/draggable' {Link} = require 'react-router' +NoMoreSubjectsModal = require 'components/no-more-subjects-modal' module.exports = React.createClass # rename to Classifier displayName: 'Mark' @@ -175,7 +176,7 @@ module.exports = React.createClass # rename to Classifier activeSubjectHelper: null render: -> - return null unless @getCurrentSubjectSet()? && @getActiveWorkflow()? + return null unless @getActiveWorkflow()? currentTask = @getCurrentTask() TaskComponent = @getCurrentTool() @@ -184,9 +185,6 @@ module.exports = React.createClass # rename to Classifier onFirstAnnotation = @state.taskKey == firstTask currentSubtool = if @state.currentSubtool then @state.currentSubtool else @getTasks()[firstTask]?.tool_config.tools?[0] - # direct link to this page - pageURL = "#{location.origin}/#/mark?subject_set_id=#{@getCurrentSubjectSet().id}&selected_subject_id=#{@getCurrentSubject()?.id}" - if currentTask?.tool is 'pick_one' currentAnswer = (a for a in currentTask.tool_config.options when a.value == currentAnnotation.value)[0] @@ -196,13 +194,16 @@ module.exports = React.createClass # rename to Classifier
{ if @state.noMoreSubjectSets - style = marginTop: "50px" -

There is nothing left to do. Thanks for your work and please check back soon!

+ else if @state.notice {@state.notice.message} else if @getCurrentSubjectSet()? + + # direct link to this page + pageURL = "#{location.origin}/#/mark?subject_set_id=#{@getCurrentSubjectSet().id}&selected_subject_id=#{@getCurrentSubject()?.id}" + + header: 'Nothing more to do here' + + propTypes: + project: React.PropTypes.object.isRequired + header: React.PropTypes.string.isRequired + workflowName: React.PropTypes.string.isRequired + + render: -> + next_workflow = @props.project.workflowWithMostActives @props.workflowName + next_href = "/" + if next_workflow? + next_href = "/#/" + next_workflow.name + + } + > + Currently, there are no {@props.project.term('subject')}s for you to {@props.workflowName}. + { if next_workflow? + Try {next_workflow.name.capitalize()} instead! + else + Looks like there's no work do do right now. Please come back later. + } + + diff --git a/app/assets/javascripts/components/subject-viewer.cjsx b/app/assets/javascripts/components/subject-viewer.cjsx index 977a72a8..34cdf9fa 100644 --- a/app/assets/javascripts/components/subject-viewer.cjsx +++ b/app/assets/javascripts/components/subject-viewer.cjsx @@ -40,6 +40,9 @@ module.exports = React.createClass componentWillReceiveProps: (new_props) -> @setUncommittedMark null if new_props.task?.tool != 'pickOneMarkOne' + # Useful for debugging prod: + console.info "SubjectViewer view: #{new_props.subject.id}" if new_props.subject.id != @props.subject.id + if Object.keys(@props.annotation).length == 0 #prevents back-to-back mark tasks, displaying a duplicate mark from previous tasks. @setUncommittedMark null @@ -54,6 +57,16 @@ module.exports = React.createClass @loadImage @props.subject.location.standard window.addEventListener "resize", this.updateDimensions + $(document).keydown (e) => + # Handle keypress + if e.keyCode == 46 + mark = @state.selectedMark + @destroyMark mark if mark? + + # Handle keypress + else if e.keyCode == 13 + @submitMark(@state.uncommittedMark) if @state.uncommittedMark? + scrollToSubject: -> # scroll to mark when transcribing if @props.workflow.name is 'transcribe' @@ -108,10 +121,10 @@ module.exports = React.createClass # VARIOUS EVENT HANDLERS # Commit mark - submitMark: (mark) -> + submitMark: (mark, oncomplete) -> return unless mark? @props.onComplete? mark - @setUncommittedMark null # reset uncommitted mark + @setUncommittedMark null, oncomplete # reset uncommitted mark # Handle initial mousedown: handleInitStart: (e) -> @@ -198,10 +211,11 @@ module.exports = React.createClass mark.belongsToUser = true @setUncommittedMark mark - setUncommittedMark: (mark) -> + setUncommittedMark: (mark, oncomplete) -> @setState uncommittedMark: mark, - selectedMark: mark #, => @forceUpdate() # not sure if this is needed? + selectedMark: mark, () => + oncomplete() if oncomplete? setView: (viewX, viewY, viewWidth, viewHeight) -> @setState {viewX, viewY, viewWidth, viewHeight} @@ -235,7 +249,7 @@ module.exports = React.createClass # First, if we're blurring some other uncommitted mark, commit it: if @state.uncommittedMark? && mark != @state.uncommittedMark - @submitMark sel + @submitMark @state.uncommittedMark, sel else sel() diff --git a/app/assets/javascripts/components/transcribe/index.cjsx b/app/assets/javascripts/components/transcribe/index.cjsx index 3326ec09..5c325675 100644 --- a/app/assets/javascripts/components/transcribe/index.cjsx +++ b/app/assets/javascripts/components/transcribe/index.cjsx @@ -22,6 +22,7 @@ HelpModal = require 'components/help-modal' Tutorial = require 'components/tutorial' DraggableModal = require 'components/draggable-modal' GenericButton = require 'components/buttons/generic-button' +NoMoreSubjectsModal = require 'components/no-more-subjects-modal' module.exports = React.createClass # rename to Classifier displayName: 'Transcribe' @@ -101,7 +102,9 @@ module.exports = React.createClass # rename to Classifier page: @props.query.page render: -> - if @props.params.workflow_id? and @props.params.parent_subject_id? + if @props.query.from == 'verify' + transcribeMode = 'verify' + else if @props.params.workflow_id? and @props.params.parent_subject_id? transcribeMode = 'page' else if @props.params.subject_id transcribeMode = 'single' @@ -113,6 +116,7 @@ module.exports = React.createClass # rename to Classifier else isLastSubject = null currentAnnotation = @getCurrentClassification().annotation + currentAnnotation = @props.query.annotation if @props.query.annotation? TranscribeComponent = @getCurrentTool() # @state.currentTool onFirstAnnotation = currentAnnotation?.task is @getActiveWorkflow().first_task @@ -129,14 +133,8 @@ module.exports = React.createClass # rename to Classifier } { if @state.noMoreSubjects - } - > - Currently, there are no {@props.project.term('subject')}s for you to {@props.workflowName}. Try marking instead! - - - + + else if @getCurrentSubject()? and @getCurrentTask()? - field_keys = (c.value for c of @props.task.tool_config.options) + field_keys = (c.value for c in @props.task.tool_config.options) next_field_key = field_keys[ field_keys.indexOf(@state.active_field_key) + 1 ] if next_field_key? @@ -63,7 +63,9 @@ CompositeTool = React.createClass , => @forceUpdate() else - @commitAnnotation() + # Default to first field key (in case we're repeating this task) + @setState active_field_key: field_keys[0], () => + @commitAnnotation() # User moved focus to an input: handleFieldFocus: (annotation_key) -> @@ -79,6 +81,8 @@ CompositeTool = React.createClass if @props.transcribeMode is 'page' or @props.transcribeMode is 'single' if @props.isLastSubject and not @props.task.next_task? @props.returnToMarking() + else if @props.transcribeMode == 'verify' + @transitionTo 'verify' # this can go into a mixin? (common across all transcribe tools) returnToMarking: -> @@ -109,6 +113,8 @@ CompositeTool = React.createClass else if @props.isLastSubject and ( @props.transcribeMode is 'page' or @props.transcribeMode is 'single' ) 'Return to Marking' + else if @props.transcribeMode is 'verify' + 'Return to Verify' else 'Next Entry' buttons.push diff --git a/app/assets/javascripts/components/transcribe/tools/text-tool/index.cjsx b/app/assets/javascripts/components/transcribe/tools/text-tool/index.cjsx index 5dbb2c42..649e00aa 100644 --- a/app/assets/javascripts/components/transcribe/tools/text-tool/index.cjsx +++ b/app/assets/javascripts/components/transcribe/tools/text-tool/index.cjsx @@ -1,4 +1,5 @@ React = require 'react' +{Navigation} = require 'react-router' DraggableModal = require 'components/draggable-modal' SmallButton = require 'components/buttons/small-button' HelpButton = require 'components/buttons/help-button' @@ -7,6 +8,7 @@ IllegibleSubjectButton = require 'components/buttons/illegible-subject-button' TextTool = React.createClass displayName: 'TextTool' + mixins: [Navigation] # load subjects and set state variables: subjects, classification getInitialState: -> annotation: @props.annotation ? {} @@ -119,6 +121,8 @@ TextTool = React.createClass if @props.transcribeMode is 'page' or @props.transcribeMode is 'single' if @props.isLastSubject and not @props.task.next_task? @props.returnToMarking() + else if @props.transcribeMode == 'verify' + @transitionTo 'verify' # Get key to use in annotations hash (i.e. typically 'value', unless included in composite tool) fieldKey: -> @@ -159,6 +163,7 @@ TextTool = React.createClass render: -> return null if @props.loading # hide transcribe tool while loading image + val = @state.annotation[@fieldKey()] val = '' if ! val? @@ -235,6 +240,8 @@ TextTool = React.createClass else if @props.isLastSubject and ( @props.transcribeMode is 'page' or @props.transcribeMode is 'single' ) 'Return to Marking' + else if @props.transcribeMode is 'verify' + 'Return to Verify' else 'Next Entry' buttons.push diff --git a/app/assets/javascripts/components/verify/index.cjsx b/app/assets/javascripts/components/verify/index.cjsx index 3e0c2034..89aea3e8 100644 --- a/app/assets/javascripts/components/verify/index.cjsx +++ b/app/assets/javascripts/components/verify/index.cjsx @@ -12,6 +12,8 @@ DraggableModal = require 'components/draggable-modal' GenericButton = require 'components/buttons/generic-button' Tutorial = require 'components/tutorial' HelpModal = require 'components/help-modal' +NoMoreSubjectsModal = require 'components/no-more-subjects-modal' +LoadingIndicator = require('components/loading-indicator') # Hash of core tools: coreTools = require 'components/core-tools' @@ -66,20 +68,19 @@ module.exports = React.createClass # rename to Classifier @setState helping: not @state.helping render: -> + currentAnnotation = @getCurrentClassification().annotation onFirstAnnotation = currentAnnotation?.task is @getActiveWorkflow().first_task
- { if ! @getCurrentSubject()? - - } - > - Currently, there are no {@props.project.term('subject')}s for you to {@props.workflowName}. Try marking instead! - + { if ! @state.subjects? + + + else if ! @getCurrentSubject()? + + else if @getCurrentSubject()? diff --git a/app/assets/javascripts/components/verify/tools/verify-tool/index.cjsx b/app/assets/javascripts/components/verify/tools/verify-tool/index.cjsx index 76235bce..2890d143 100644 --- a/app/assets/javascripts/components/verify/tools/verify-tool/index.cjsx +++ b/app/assets/javascripts/components/verify/tools/verify-tool/index.cjsx @@ -64,6 +64,11 @@ VerifyTool = React.createClass y = data.y + yPad return {x,y} + editAnnotation: (ann) -> + url = "/#/transcribe/#{@props.subject.parent_subject_id}?scrollX=#{window.scrollX}&scrollY=#{window.scrollY}&from=verify" + url += "&" + ("annotation[#{k}]=#{v}" for k,v of ann).join('&') + window.location.href = url + render: -> # return null unless @props.viewerSize? && @props.subject? # return null if ! @props.scale? || ! @props.scale.horizontal? @@ -76,7 +81,6 @@ VerifyTool = React.createClass label = @props.label ? '' buttons = [] - console.info "Verifying subject id #{@props.subject.id}" if @props.onShowHelp? buttons.push @@ -105,7 +109,7 @@ VerifyTool = React.createClass Original prompt: { @props.subject.data.task_prompt } } diff --git a/app/assets/javascripts/lib/fetch-subject-sets-mixin.cjsx b/app/assets/javascripts/lib/fetch-subject-sets-mixin.cjsx index 02caa8f2..df44180b 100644 --- a/app/assets/javascripts/lib/fetch-subject-sets-mixin.cjsx +++ b/app/assets/javascripts/lib/fetch-subject-sets-mixin.cjsx @@ -79,9 +79,12 @@ module.exports = params = {}; params[k] = v for k,v of _params when v? API.type('subject_sets').get(params).then (sets) => + if sets.length is 0 + @setState noMoreSubjectSets: true - @setState subjectSets: sets, () => - @fetchSubjectsForCurrentSubjectSet 1, null, callback + else + @setState subjectSets: sets, () => + @fetchSubjectsForCurrentSubjectSet 1, null, callback # PB: Setting default limit to 120 because it's a multiple of 3 mandated by thumb browser fetchSubjectsForCurrentSubjectSet: (page=1, limit=120, callback) -> diff --git a/app/assets/javascripts/lib/mark-button-mixin.cjsx b/app/assets/javascripts/lib/mark-button-mixin.cjsx index b9160d68..e9e7ab78 100644 --- a/app/assets/javascripts/lib/mark-button-mixin.cjsx +++ b/app/assets/javascripts/lib/mark-button-mixin.cjsx @@ -28,12 +28,12 @@ module.exports = markStatus: markStatus locked: '' - checkLocation: ()-> + inTranscribeWorkflow: ()-> pattern = new RegExp('^(#\/transcribe)') pattern.test("#{window.location.hash}") renderMarkButton: -> - return null if @checkLocation() + return null if @inTranscribeWorkflow() 0 && (opt = options[0])? && opt.next_task? + if (options = (c for c in task.tool_config?.options when c.value is @getCurrentClassification()?.annotation?.value)) && options.length > 0 && (opt = options[0])? && opt.next_task? nextKey = opt.next_task else nextKey = @getTasks()[@state.taskKey].next_task diff --git a/app/assets/javascripts/models/project.coffee b/app/assets/javascripts/models/project.coffee index 39dfc7e3..6ea25154 100644 --- a/app/assets/javascripts/models/project.coffee +++ b/app/assets/javascripts/models/project.coffee @@ -7,4 +7,12 @@ class Project term: (t) -> @terms_map[t] ? t + workflowWithMostActives: (not_named = '') -> + (w for w in @mostActiveWorkflows() when w.name != not_named)[0] + + mostActiveWorkflows: -> + @workflows.sort (w1, w2) -> + return -1 if w1.active_subjects > w2.active_subjects + 1 + module.exports = Project diff --git a/app/assets/stylesheets/application.styl b/app/assets/stylesheets/application.styl index 727a45f5..0680948c 100644 --- a/app/assets/stylesheets/application.styl +++ b/app/assets/stylesheets/application.styl @@ -32,6 +32,8 @@ @import './groups.styl' @import './group-browser.styl' +@import './final-subject-set-browser.styl' + /* MARK STYLES */ @import './components/mark/point-tool.styl' @import './components/mark/rectangle-tool.styl' diff --git a/app/assets/stylesheets/classify.styl b/app/assets/stylesheets/classify.styl index f435ab2b..9b83edc1 100644 --- a/app/assets/stylesheets/classify.styl +++ b/app/assets/stylesheets/classify.styl @@ -9,6 +9,13 @@ .subject-area flex(grow: 1) + .loading-indicator + width 100px + margin 100px auto + color white + position inherit + display block + .subject-container color: gray diff --git a/app/assets/stylesheets/final-subject-set-browser.styl b/app/assets/stylesheets/final-subject-set-browser.styl new file mode 100644 index 00000000..e0b4f1b0 --- /dev/null +++ b/app/assets/stylesheets/final-subject-set-browser.styl @@ -0,0 +1,111 @@ +.final-subject-set-browser + ul + list-style none + padding-left 0 + + .json-link + width auto + float right + margin-left 10px + + + // Search page: + + form + padding-bottom 1em + + input + vertical-align middle + + input#data-search + font-size 2em + margin-right 10px + + ul.results + li + display inline-block + margin 0 10px 10px 0 + width 400px + height 150px + overflow hidden + + a + color #2b3a42 + text-decoration none + + .image + width 170px + float left + + .matches + width 200px + float left + + .match + padding-bottom 0.5em + line-height 1.5em + + .field + font-weight bold + + &:after + content ":" + + .term + padding-left 1em + + em + font-weight bold + color TERTIARY_NORMAL + + + // Set page: + .final-subject-set-page + img.standard-image + max-width 600px + + h3 + clear both + margin-bottom 0 + + .confidence + opacity 0.5 + + &:after + content "% confidence" + + + ul.assertion-data + clear left + + li + color gray + + span.value + font-weight bold + color #2b3a42 + + span.data-key + margin-left 20px + + dl.assertion-properties + clear left + + dt,dd + display inline + color gray + + dt + &:after + content ":" + + dd + margin-left 10px + margin-right 40px + + .image-crop + opacity 0.7 + + &:hover + opacity 1 + diff --git a/app/assets/stylesheets/verify-tool.styl b/app/assets/stylesheets/verify-tool.styl index 488694b0..c96acbae 100644 --- a/app/assets/stylesheets/verify-tool.styl +++ b/app/assets/stylesheets/verify-tool.styl @@ -4,31 +4,62 @@ label margin 1em - ul + & > ul padding-left 0 list-style none text-align left - li + & > li + + position relative + flexbox(flex) + flex-direction(row) + flex-wrap(nowrap) + padding-bottom 1em + + button.edit-button + z-index 2 + opacity 0.6 + padding 0.2em 0.6em + margin-left 10px + + flex(shrink: 0) + + &:hover + opacity 1 + a color black text-decoration none + // display block + flex(grow: 1, shrink: 1) + background-color white + border-radius 6px + + opacity 0.7 ul.choice - margin 1em 0 + z-index 1 + // margin 1em 0 padding 0.5em - - background-color white - border-radius 6px - - opacity 0.7 + list-style none span font-weight bold + &.value + font-family "courier new", "courier", monospace + &.label padding-right 10px font-weight normal - &:hover - opacity 1 + &:hover + opacity 1 + box-shadow 0 0 3px 3px rgba(0,0,0,.1) + + ul + background-image url(/assets/checkmark.png) + background-repeat no-repeat + background-position 96% center + diff --git a/app/controllers/admin/data_controller.rb b/app/controllers/admin/data_controller.rb index 5509f670..2eafd645 100644 --- a/app/controllers/admin/data_controller.rb +++ b/app/controllers/admin/data_controller.rb @@ -1,28 +1,16 @@ class Admin::DataController < Admin::AdminBaseController def index - @num_complete = Subject.complete.count - @num_non_root = Subject.active_non_root.count - end - - def download - if params[:download_format] - redirect_to "#{admin_data_download_path}.#{params[:download_format]}?download_status=#{params[:download_status]}" - - else - - if params[:download_status] == 'complete' - @subjects = Subject.complete - respond_to do |format| - format.json {render json: CompleteSubjectsSerializer.new(@subjects)} - end - - else - @sets = SubjectSet.all - respond_to do |format| - format.json {render json: FinalDataSerializer.new(@sets)} + @project = Project.current + if request.post? + if (proj = params[:project]) + if (v = proj[:downloadable_data]) + new_val = v == '1' + puts "updating project: #{new_val} because #{v}" + @project.update_attributes downloadable_data: new_val end end end - end + @export = FinalDataExport.most_recent.first + end end diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index b7395a64..62424948 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -10,6 +10,10 @@ def require_user! current_or_guest_user(create_if_missing = true) end + def get_bot_user_from_request(request) + BotUser.by_auth request.headers + end + # Get currently logged-in user, creating guest as indicated def current_or_guest_user(create_if_missing = false) if current_user diff --git a/app/controllers/classifications_controller.rb b/app/controllers/classifications_controller.rb index 6aa85298..f2e2bc94 100644 --- a/app/controllers/classifications_controller.rb +++ b/app/controllers/classifications_controller.rb @@ -4,29 +4,64 @@ class ClassificationsController < ApplicationController def create - user = require_user! + # Is it a bot? + user = get_bot_user_from_request request - workflow_id = BSON::ObjectId.from_string params["classifications"]["workflow_id"] + user = require_user! if user.nil? + + workflow_id = params["classifications"]["workflow_id"] ? params["classifications"]["workflow_id"] : nil task_key = params["classifications"]["task_key"] annotation = params["classifications"]["annotation"] annotation = {} if annotation.nil? - started_at = params["classifications"]["metadata"]["started_at"] - finished_at = params["classifications"]["metadata"]["finished_at"] + + started_at = nil + finished_at = nil + if params["classifications"]["metadata"] + started_at = params["classifications"]["metadata"]["started_at"] + finished_at = params["classifications"]["metadata"]["finished_at"] + + else + started_at = finished_at = Time.new.strftime("%Y%m%dT%H%M%S%z") + end + subject_id = params["classifications"]["subject_id"] user_agent = request.headers["HTTP_USER_AGENT"] - @result = Classification.create( - workflow_id: workflow_id, - subject_id: subject_id, - location: location, + # If workflow not found by id, maybe it was specified by name? + if workflow_id.nil? && ! params["workflow"].nil? + workflow = Workflow.find_by name: params["workflow"]["name"] + workflow_id = workflow.id + end + + workflow_id = BSON::ObjectId.from_string workflow_id if ! workflow_id.nil? + + # If user is a bot, consider creating the subject on the fly: + if user.is_a?(BotUser) && subject_id.nil? && (standard_url = params["subject"]["location"]["standard"]) + subject_id = Subject.find_or_create_root_by_standard_url(standard_url).id + end + + h = { annotation: annotation, - started_at: started_at, - finished_at: finished_at, - user_agent: user_agent, + location: location, + subject_id: subject_id, task_key: task_key, - user: user - ) + workflow_id: workflow_id, + user_id: user.id + } + if (@result = Classification.find_by_props(h)).nil? + @result = Classification.create( + workflow_id: workflow_id, + subject_id: subject_id, + location: location, + annotation: annotation, + started_at: started_at, + finished_at: finished_at, + user_agent: user_agent, + task_key: task_key, + user: user + ) + end render json: @result end diff --git a/app/controllers/final_data_exports_controller.rb b/app/controllers/final_data_exports_controller.rb new file mode 100644 index 00000000..fbd0057a --- /dev/null +++ b/app/controllers/final_data_exports_controller.rb @@ -0,0 +1,23 @@ +class FinalDataExportsController < FinalDataController + + def latest + puts "FinalDataExport.most_recent.first: #{FinalDataExport.most_recent.first.inspect}" + show FinalDataExport.most_recent.first + end + + def show(export = nil) + export = FinalDataExport.find(params[:id]) unless export + return render text: 'Not found.', status: 404 if export.nil? + + redirect_to export.path + end + + def index + @exports = FinalDataExport.most_recent.limit(20) + + respond_to do |format| + format.atom + end + end + +end diff --git a/app/controllers/final_subject_sets_controller.rb b/app/controllers/final_subject_sets_controller.rb new file mode 100644 index 00000000..bf3e7980 --- /dev/null +++ b/app/controllers/final_subject_sets_controller.rb @@ -0,0 +1,21 @@ +class FinalSubjectSetsController < FinalDataController + respond_to :json + + def show + @set = FinalSubjectSet.find params[:id] + respond_with FinalSubjectSetSerializer.new @set + end + + def index + per_page = get_int :per_page, 20, (0..50) + page = get_int :page, 1 + + keyword = params[:keyword] + + @sets = FinalSubjectSet.page(page).per(per_page) + @sets = @sets.where({"$text" => {"$search" => keyword} } ) if keyword + + respond_with GenericResultSerializer.new(@sets) + end + +end diff --git a/app/controllers/projects_controller.rb b/app/controllers/projects_controller.rb index 4762c095..6a7c7072 100644 --- a/app/controllers/projects_controller.rb +++ b/app/controllers/projects_controller.rb @@ -12,14 +12,5 @@ def stats render :json => {:project => project, :stats => project.stats} end -=begin - def project_css - render text: Project.current.styles - end - - def project_js - render text: Project.current.custom_js - end -=end end diff --git a/app/controllers/subjects_controller.rb b/app/controllers/subjects_controller.rb index 8a93cef7..c087ed55 100644 --- a/app/controllers/subjects_controller.rb +++ b/app/controllers/subjects_controller.rb @@ -70,6 +70,7 @@ def show self: url_for(@subject) } @subject = Subject.find subject_id + respond_with SubjectResultSerializer.new(@subject, scope: self.view_context), links: links end diff --git a/app/models/bot_user.rb b/app/models/bot_user.rb new file mode 100644 index 00000000..abbf522c --- /dev/null +++ b/app/models/bot_user.rb @@ -0,0 +1,47 @@ +class BotUser < User + + AUTH_HEADER = 'HTTP_BOT_AUTH' + + # Create bot user with name + def self.create(name) + user = find_or_initialize_by name: name, role: 'bot' + token = '' + if ! user.persisted? + token = user.reset_token! + end + {user: user, token: token} + end + + # Immediately overwrite existing token with a new one + def reset_token! + token = Devise.friendly_token[0,20] + self.password = self.password_confirmation = token + self.email = "#{name}@scribe" + save! validate: false + token + end + + def self.pack_auth_header(user_id, token) + [user_id, token].join ":" + end + + def self.unpack_auth_header(str) + str.split ":" + end + + # Given hash of headers, return bot user if a header authenticates + def self.by_auth(headers) + # No header? Fail. + return nil if headers[AUTH_HEADER].blank? + + # Fail if header doesn't have two values: + parts = unpack_auth_header headers[AUTH_HEADER] + return nil if parts.size != 2 + + # Get user by name and auth using token: + user = find parts[0] + return nil if ! user.valid_password? parts[1] + + user + end +end diff --git a/app/models/classification.rb b/app/models/classification.rb index 99f91f1f..93262db1 100644 --- a/app/models/classification.rb +++ b/app/models/classification.rb @@ -11,6 +11,11 @@ class Classification field :finished_at field :user_agent + field :data_md5 + + before_create :generate_data_md5 + + belongs_to :workflow, :foreign_key => "workflow_id" belongs_to :user belongs_to :subject, foreign_key: "subject_id", inverse_of: :classifications @@ -86,9 +91,15 @@ def increment_subject_classification_count end if self.task_key == "flag_bad_subject_task" - subject.increment_flagged_bad_count_by_one - # Push user_id onto Subject.deleting_user_ids if appropriate - Subject.where({id: subject.id}).find_and_modify({"$addToSet" => {deleting_user_ids: user_id.to_s}}) + # If deleting user is creator, immediately change status to bad + if subject.created_solely_by?(user) + subject.bad! + + else + subject.increment_flagged_bad_count_by_one + # Push user_id onto Subject.deleting_user_ids if appropriate + Subject.where({id: subject.id}).find_and_modify({"$addToSet" => {deleting_user_ids: user_id.to_s}}) + end end if self.task_key == "flag_illegible_subject_task" @@ -110,6 +121,26 @@ def to_s "#{workflow_name} Classification (#{ ann.blank? ? task_key : ann})" end + def generate_data_md5 + props = { + annotation: annotation, + location: location, + subject_id: subject_id, + task_key: task_key, + workflow_id: workflow_id + } + self.data_md5 = self.class.data_md5_for_props(props) + end + + def self.find_by_props(props) + find_by data_md5: data_md5_for_props(props) + end + + def self.data_md5_for_props(props) + Digest::MD5.hexdigest(props.to_query) + end + + # Returns hash mapping distinct values for given field to matching count: def self.group_by_hour(match={}) agg = [] diff --git a/app/models/concerns/group_by_field.rb b/app/models/concerns/group_by_field.rb new file mode 100644 index 00000000..6ca51a41 --- /dev/null +++ b/app/models/concerns/group_by_field.rb @@ -0,0 +1,19 @@ +module GroupByField + extend ActiveSupport::Concern + + module ClassMethods + + # Returns hash mapping distinct values for given field to matching count: + def group_by_field(field, match={}) + puts "group #{collection.inspect} by #{field}" + agg = [] + agg << {"$match" => match } if match + agg << {"$group" => { "_id" => "$#{field.to_s}", count: {"$sum" => 1} }} + collection.aggregate(agg).inject({}) do |h, p| + h[p["_id"]] = p["count"] + h + end + end + + end +end diff --git a/app/models/final_data_export.rb b/app/models/final_data_export.rb new file mode 100644 index 00000000..f85c56a1 --- /dev/null +++ b/app/models/final_data_export.rb @@ -0,0 +1,11 @@ +class FinalDataExport + include Mongoid::Document + include Mongoid::Timestamps + + belongs_to :project + field :path, type: String + field :num_final_subject_sets, type: Integer + + scope :most_recent, -> { order(updated_at: -1) } + +end diff --git a/app/models/final_subject.rb b/app/models/final_subject.rb new file mode 100644 index 00000000..d7deb5a6 --- /dev/null +++ b/app/models/final_subject.rb @@ -0,0 +1,117 @@ +class FinalSubject + include Mongoid::Document + + field :type, type: String + field :location, type: Hash + field :status, type: String + field :width, type: Integer + field :height, type: Integer + field :meta_data, type: Hash + field :data, type: Hash + field :classifications_breakdown, type: Hash + field :flags, type: Hash + + belongs_to :subject + embedded_in :final_subject_set, inverse_of: :subjects + embeds_many :assertions, class_name: 'FinalSubjectAssertion' + + def fulltext_terms + assertions.select { |assertion| ! assertion.data.blank? && assertion.created_in_workflow != 'mark' }.map { |assertion| assertion.data.values }.select { |v| ! v.empty? } + end + + def fulltext_terms_by_field + assertions.select { |assertion| ! assertion.data.blank? && assertion.created_in_workflow != 'mark' }.inject({}) do |h, a| + puts "collected: #{a.name}" + h[a.name] = [] if h[a.name].nil? + h[a.name] = a.data.values.select { |v| ! v.empty? } + h + end + end + + def self.create_from_subject(subject) + inst = self.new subject: subject + [:type, :location, :status, :width, :height, :meta_data].each do |p| + inst.send("#{p}=", subject.send(p)) + end + + inst.build_assertions! + # inst.build_classifications_breakdown! + # inst.build_data! + + inst + end + + def build_data! + distinct = assertions.inject({}) do |h, assertion| + if assertion.created_in_workflow != 'mark' + h[assertion.task_key] = [] if h[assertion.task_key].nil? + data = assertion.data + data = data["values"].first if ! data["values"].nil? + data = data["value"] if data["value"] + stmt = {value: data, label: assertion.instructions['transcribe']} + has_data = ! data.blank? + has_data &= ! data.values.select { |v| ! v.blank? }.empty? if data.is_a? Hash + h[assertion.task_key] << stmt if has_data && ! h[assertion.task_key].include?(stmt) + end + h + end + self.data = distinct + end + + def build_assertions! + assertions.destroy_all + + flattened_subjects(subject.child_subjects).each do |s| + assertions << FinalSubjectAssertion.create_from_subject(s[:subject], s[:parents]) + end + + self + end + + def build_classifications_breakdown! + all_classifications = [] + @all_subjects.each do |s| + all_classifications += s.classifications + end + self.classifications_breakdown = all_classifications.inject({}) { |h, c| h[c.task_key] ||= 0; h[c.task_key] += 1; h } + self.classifications_breakdown[:total] = subject.classifications.count + end + + def flags + { + complete: flagged_for_retirement, + bad: { + votes_in_favor: subject.flagged_bad_count || 0 + } + } + end + + def flagged_for_retirement + votes = subject.number_of_completion_assessments + h = { + votes_in_favor: subject.retire_count || 0, + total_votes: votes, + } + h[:percentage_in_favor] = subject.retire_count / votes.to_f if ! subject.retire_count.nil? && votes > 0 + h + end + + def flattened_subjects(subjects, parents = []) + @all_subjects ||= [] + @all_subjects += subjects + + ret = [] + subjects.each do |s| + next if ! s.parent_classifications.empty? && s.parent_classifications.limit(1).first.task_key == 'completion_assessment_task' + + if s.child_subjects.size > 0 + ret += flattened_subjects(s.child_subjects, parents + [s]) + + else + # ret << FinalSubjectAssertionSerializer.new(subject: s, parents: parents) + ret << {subject: s, parents: parents} if s.status != 'bad' + end + end + ret + end +end diff --git a/app/models/final_subject_assertion.rb b/app/models/final_subject_assertion.rb new file mode 100644 index 00000000..9fcb50af --- /dev/null +++ b/app/models/final_subject_assertion.rb @@ -0,0 +1,122 @@ +class FinalSubjectAssertion + include Mongoid::Document + + field :name, type: String + field :status, type: String + field :created_in_workflow, type: String + field :confidence, type: Float + field :data, type: Hash + field :versions, type: Array + field :region, type: Hash + field :task_key, type: String + field :instructions, type: Hash + + embedded_in :final_subject, inverse_of: :assertions + + def self.create_from_subject(subject, parents) + inst = new + + inst.name = subject.export_name + inst.status = status_for_subject(subject) + inst.created_in_workflow = subject.parent_workflow.nil? ? nil : subject.parent_workflow.name + inst.confidence = confidence_for_subject(subject) + inst.data = data_for_subject(subject) + inst.versions = classifications_for_subject(subject) + inst.region = region_for_subject(subject) + inst.task_key = subject.parent_classifications.empty? ? nil : subject.parent_classifications.limit(1).first.task_key + inst.instructions = instructions_for_subject(subject, parents) + + inst + end + + def self.classifications_for_subject(subject) + # Hack to show all distinct classifications with counts for terminal subjects being transcribed: + # if object[:subject].parent_workflow.name == 'transcribe' + + annotations_with_confidence subject if ! subject.parent_workflow.nil? && subject.parent_workflow.name != 'mark' + end + + def self.instructions_for_subject(subject, parents) + ret = {} + + parents.each do |s| + next if s.parent_workflow.nil? + + if s.parent_workflow.name == 'mark' && subject.region[:label] + ret[s.parent_workflow.name] = subject.region[:label] + + else + ret[s.parent_workflow.name] = s.parent_workflow_task.instruction + end + end + ret[subject.parent_workflow.name] = subject.parent_workflow_task.instruction if ! subject.parent_workflow.nil? + ret + end + + def self.region_for_subject(subject) + region = subject.region + return nil if region.nil? + + # not important: + region.delete 'color' + + # Translate toolName to generic 'shape' name: + region[:shape] = case region[:toolName] + when 'rectangleTool','rowTool' then 'rectangle' + when 'pointTool' then 'point' + end + region.delete 'toolName' + + region + end + + def self.data_for_subject(subject) + data = nil + if ['complete','retired'].include? subject.status + data = subject.data + else + cl = annotations_with_confidence(subject).first + data = cl.nil? ? nil : cl[:data] + end + data = data['values'].first if data && data['values'] + + data + end + + def self.confidence_for_subject(subject) + if subject.status == 'complete' + 1 + elsif subject.status == 'retired' + 1 + else + annotations_with_confidence(subject).map { |a| a[:confidence] }.max + end + end + + def self.status_for_subject(subject) + return nil if subject.parent_workflow.nil? + + return 'complete' if subject.status == 'complete' + + if subject.parent_workflow.name == 'transcribe' + return 'awaiting_transcriptions' if subject.status == 'inactive' + return 'awaiting_votes' if subject.status == 'active' + + elsif subject.parent_workflow.name == 'verify' + return 'awaiting_votes' if subject.status == 'inactive' + end + + subject.status + end + + + def self.annotations_with_confidence(subject) + num_votes = [subject.parent_workflow.nil? ? 3 : subject.parent_workflow.generates_subjects_after, subject.parent_classifications.count].max + grouped = subject.parent_classifications.inject({}) { |h, c| h[c.annotation] ||= 0; h[c.annotation] += 1; h } + classifications_by_annotation = subject.parent_classifications.inject({}) { |h, c| h[c.annotation] ||= []; h[c.annotation] << {created: c.created_at, user_id: c.user_id, duration: c.finished_at.to_time - c.started_at.to_time, user_id: c.user_id.to_s }; h } + grouped = grouped.inject([]) { |a,(annotation,count)| a << {data: annotation, votes: count, confidence: count.to_f / num_votes, instances: classifications_by_annotation[annotation] }; a } + grouped = grouped.sort_by { |a| - a[:confidence] } + grouped + end + +end diff --git a/app/models/final_subject_set.rb b/app/models/final_subject_set.rb new file mode 100644 index 00000000..47c28b09 --- /dev/null +++ b/app/models/final_subject_set.rb @@ -0,0 +1,82 @@ +class FinalSubjectSet + include Mongoid::Document + include Mongoid::Timestamps + # include Mongoid::FullTextSearch + + belongs_to :project + belongs_to :subject_set + field :name, type: String + field :meta_data, type: Hash + + field :search_terms + field :search_terms_by_field + + index({"subjects.assertions.confidence" => 1}, {background: true}) + index({"subjects.assertions.task_key" => 1}, {background: true}) + index({"subject_set_id" => 1}, {background: true}) + index({"project_id" => 1}, {background: true}) + + index({"search_terms" => "text"}) + # can't create two... + # index({"search_terms_by_field" => "text"}) + + [:total, :complete, :awaiting_votes, :in_progress, :awaiting_transcriptions].each do |field| + index({"subjects.assertions_breakdown.all_workflows.#{field}" => 1}, {background: true}) + end + + embeds_many :subjects, class_name: 'FinalSubject' + + # fulltext_search_in :fulltext_terms + + def build_search_terms + update_attributes({ + search_terms: compute_fulltext_terms, + search_terms_by_field: compute_fulltext_terms_by_field + }) + end + + def compute_fulltext_terms + compute_fulltext_terms_by_field.values.flatten.uniq + end + + def compute_fulltext_terms_by_field + subjects.map { |subject| subject.fulltext_terms_by_field }.inject({}) do |h, terms| + terms.each do |(k,vs)| + h[k] = [] if h[k].nil? + h[k] += vs + end + h + end + end + + def self.assert_for_set(set, rebuild=false) + # If final_subject_set record was built after most recent generated subject, consider skipping + if ! rebuild && (final_ss = find_by(subject_set:set)) + subjs_updated = set.subjects.max(:updated_at) + return if final_ss.updated_at > subjs_updated + end + inst = find_or_create_by subject_set: set + inst.project = set.project + inst.meta_data = set.meta_data + inst.update_subjects + inst.build_search_terms + inst.save! + end + + def update_subjects + + subjects.destroy_all + + subject_set.subjects.root.each do |subject| + subjects << FinalSubject.create_from_subject(subject) + end + end + + def self.rebuild_indexes(for_project) + collection.indexes.drop + for_project.export_names.each do |(key,name)| + index({"search_terms_by_field.#{key}" => 1}, {background: true}) + end + create_indexes + end +end diff --git a/app/models/project.rb b/app/models/project.rb index 6bfc3e58..410a2a82 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -18,10 +18,10 @@ class Project field :pages, type: Array, default: [] field :menus, type: Hash, default: {} field :partials, type: Hash, default: {} - field :logo, type: String - field :background, type: String - field :favicon, type: String - field :forum, type: Hash + field :logo, type: String, default: nil + field :background, type: String, default: nil + field :favicon, type: String, default: nil + field :forum, type: Hash, default: nil field :feedback_form_url, type: String field :discuss_url, type: String field :blog_url, type: String @@ -29,12 +29,13 @@ class Project field :styles, type: String field :custom_js, type: String field :admin_email, type: String - field :team_emails, type: Array + field :team_emails, type: Array, default: [] field :metadata_search, type: Hash field :tutorial, type: Hash field :terms_map, type: Hash, default: {} # Hash mapping internal terms to project appropriate terms (e.g. 'group'=>'ship') field :status, type: String, default: 'inactive' - field :analytics, type: Hash + field :analytics, type: Hash, default: nil + field :downloadable_data, type: Boolean # 10.27.15 until we can sort out a better time to call this method, lets comment it out. include CachedStats @@ -44,6 +45,7 @@ class Project has_many :subject_sets has_many :workflows, dependent: :destroy, order: "order ASC" has_many :subjects + has_many :final_subject_sets scope :most_recent, -> { order(updated_at: -1) } scope :active, -> { where(status: 'active') } @@ -63,6 +65,21 @@ def self.current active.first end + # get Distinct export_names from all workflow_tasks + def export_names + workflows.inject([]) do |a, w| + a += w.tasks.map { |t| t.export_name } + + end.select do |n| + ! n.nil? + + end.inject({}) do |h, name| + key = name.gsub(' ', '-').gsub(/[^A-Za-z0-9-]/, '') + h[key] = name + h + end + end + def calc_stats # amount of days to calculate statistics for range_in_days = 7 diff --git a/app/models/subject.rb b/app/models/subject.rb index f309300b..0f826159 100644 --- a/app/models/subject.rb +++ b/app/models/subject.rb @@ -25,7 +25,7 @@ class Subject field :type, type: String, default: "root" #options: "root", "secondary" field :status, type: String, default: "active" #options: "active", "inactive", "bad", "retired", "complete", "contentious" - field :meta_data, type: Hash + field :meta_data, type: Hash, default: {} field :classification_count, type: Integer, default: 0 field :random_no, type: Float field :secondary_subject_count, type: Integer, default: 0 @@ -74,7 +74,12 @@ class Subject index({"type" => 1, "subject_set_id" => 1}, {background: true}) # Index for fetching child subjects for a parent subject, optionally filtering by region NOT NULL index({parent_subject_id: 1, status: 1, region: 1}) - + + def created_solely_by?(user) + created_by = created_by_user_id == user.id.to_s + created_by ||= creating_user_ids.size == 1 && creating_user_ids.first == user.id.to_s + created_by + end def thumbnail location['thumbnail'].nil? ? location['standard'] : location['thumbnail'] @@ -117,6 +122,13 @@ def parent_workflow_task end end + def export_name + return nil if parent_workflow.nil? + + transcribe_subject = parent_workflow.name == 'transcribe' ? self : parent_subject + transcribe_subject.parent_workflow_task.export_name if transcribe_subject && transcribe_subject.parent_workflow_task + end + # find all the classifications for subject where task_key == compleletion_assesment_task # calculate the percetage vote for retirement (pvr) # if pvr is equal or greater than retire_limit, set self.status == retired. @@ -131,16 +143,20 @@ def check_flagged_bad_count # calculate the percetage vote for retirement (pvr) # if pvr is equal or greater than retire_limit, set self.status == retired. def check_retire_by_vote - assesment_classifications = classifications.where(task_key: "completion_assessment_task").count - if assesment_classifications > 2 - percentage_for_retire = retire_count / assesment_classifications.to_f - if percentage_for_retire >= workflow.retire_limit - self.retire! - increment_parents_subject_count_by -1 if parent_subject - end + if number_of_completion_assessments > 2 && percentage_for_retire >= workflow.retire_limit + increment_parents_subject_count_by -1 if self.retire! && parent_subject end end + def percentage_for_retire + assesment_classifications = number_of_completion_assessments + retire_count.to_f / assesment_classifications.to_f + end + + def number_of_completion_assessments + classifications.where(task_key: "completion_assessment_task").count || 0 + end + def bad! status! 'bad' @@ -149,12 +165,13 @@ def bad! end def retire! - return if status == "bad" - return if classifying_user_ids.length < workflow.retire_limit + return false if status == "bad" + return false if classifying_user_ids.length < workflow.retire_limit + status! 'retired' subject_set.subject_completed_on_workflow(workflow) if ! workflow.nil? - - # subject_set.inc_complete_secondary_subject 1 if type != 'root' + + true end def activate! @@ -163,7 +180,7 @@ def activate! # subject_set.inc_active_secondary_subject 1 if type != 'root' end - def calculate_most_popular_parent_classification + def parent_classifications_grouped annotations = parent_classifications.map { |c| c.annotation } buckets = annotations.inject({}) do |h, ann| h[ann] ||= 0 @@ -171,7 +188,50 @@ def calculate_most_popular_parent_classification h end buckets = buckets.sort_by { |(k,v)| - v } - buckets.map { |(k,v)| {ann: k, percentage: v.to_f / parent_classifications.count } }.first + buckets.map { |(k,v)| {ann: k, percentage: v.to_f / parent_classifications.count } } + end + + def parent_and_descendent_classifications_grouped + # Take peer classifications... + classification_weights = parent_classifications_grouped_with_counts + # and descendent classifications (those made upon child subjects) ... + sub_classification_weights = classifications_grouped_with_counts + + # and combine them into a single hash mapping distinct annotations to vote counts: + combined_weights = classification_weights + total = 0 + classification_weights.keys.each do |k| + combined_weights[k] += sub_classification_weights[k] if sub_classification_weights[k] + total += combined_weights[k] + end + + combined_weights = combined_weights.sort_by { |(k,v)| - v } + combined_weights.map { |(k,v)| {ann: k, percentage: v.to_f / total, votes: v } } + end + + def parent_classifications_grouped_with_counts + self.class.classifications_grouped_with_counts parent_classifications + end + + def classifications_grouped_with_counts + self.class.classifications_grouped_with_counts classifications + end + + def self.classifications_grouped_with_counts(classifications) + classifications.inject({}) do |h, classification| + ann = classification.annotation.except(:key, :tool, :generates_subject_type) + h[ann] = 0 if h[ann].nil? + h[ann] += 1 + h + end + end + + def calculate_most_popular_parent_classification + parent_classifications_grouped.first + end + + def parent_workflow + parent_classifications.limit(1).first.workflow if ! parent_classifications.empty? end @@ -204,6 +264,34 @@ def self.group_by_field_for_group(group, field, match={}) end + def self.find_or_create_root_by_standard_url(standard_url) + subject = Subject.find_by type: 'root', "location.standard" => standard_url + if subject.nil? + subject = Subject.create_root_for_url standard_url + end + subject + end + + def self.create_root_for_url(standard_url) + + require 'fastimage' + width, height = FastImage.size(standard_url,:raise_on_failure=>false, :timeout=>10.0) + + subject = Subject.create({ + type: 'root', + subject_set: SubjectSet.create({project: Project.current, group: Project.current.groups.first, state: 'active'}), + location: { + standard: standard_url + }, + width: width, + height: height + }) + subject.workflow = Workflow.find_by name: 'mark' + subject.activate! + subject + end + + private def status!(status) diff --git a/app/models/subject_generation_method.rb b/app/models/subject_generation_method.rb index 9faabbbc..caf5d9dc 100644 --- a/app/models/subject_generation_method.rb +++ b/app/models/subject_generation_method.rb @@ -47,6 +47,10 @@ def subject_attributes_from_classification(classification) if (label = task.tool_label(classification)) region[:label] = label end + # If region.color not passed from client, derive it from workflow_task tool config: + if ! region[:color] && task.sub_tool_config(classification) + region[:color] = task.sub_tool_config(classification)[:color] + end { parent_subject: classification.subject, diff --git a/app/models/subject_generation_methods/collect_unique.rb b/app/models/subject_generation_methods/collect_unique.rb index b0c685f8..7aeb6934 100644 --- a/app/models/subject_generation_methods/collect_unique.rb +++ b/app/models/subject_generation_methods/collect_unique.rb @@ -7,22 +7,27 @@ def process_classification(classification) atts = subject_attributes_from_classification(classification) atts[:status] = 'inactive' - classification.child_subject = Subject.find_or_initialize_by(workflow: atts[:workflow], parent_subject: atts[:parent_subject], type: atts[:type]) + classification.child_subject = Subject.find_or_create_by(workflow: atts[:workflow], parent_subject: atts[:parent_subject], type: atts[:type], subject_set: classification.subject.subject_set) classification.save - ann = classification.annotation.except(:key, :tool, :generates_subject_type) - # Collect unique annotations into data hash + classifications = nil if classification.child_subject.persisted? - values = classification.child_subject.data['values'].nil? ? [] : classification.child_subject.data['values'] - values.push ann unless values.include? ann - - atts[:data] = {'values' => values} + values = classification.child_subject.data.nil? || classification.child_subject.data['values'].nil? ? [] : classification.child_subject.data['values'] + classifications = classification.child_subject.parent_classifications else - atts[:data] = {'values' => [ann]} + classifications = [classification] end + + # Compute vote counts based on all transcriptions and all votes cast: + combined_weights = classification.child_subject.parent_and_descendent_classifications_grouped + + # Store most common 3 cause any more is probably too many to review: + vals = combined_weights[(0...3)].map { |a| a[:ann] } + atts[:data] = { "values" => vals } + atts[:data][:task_prompt] = classification.workflow_task.instruction # Don't update attributes already saved/initialized in subject: @@ -34,7 +39,8 @@ def process_classification(classification) if num_parent_classifications >= classification.workflow.generates_subjects_after # Get number of distinct classifications: - num_vals = classification.child_subject.data['values'].nil? ? -1 : classification.child_subject.data['values'].size + # num_vals = classification.child_subject.data['values'].nil? ? -1 : classification.child_subject.data['values'].size + num_vals = atts[:data]['values'].size # Where will this generated subject appear, if anywhere? next_workflow = classification.child_subject.workflow @@ -49,13 +55,13 @@ def process_classification(classification) verify_method = next_workflow.generates_subjects_method # If next workflow's generation method is most-popular and everyone transcribed the same thing, auto upgrade to 'complete': - if num_vals == 1 && verify_method == 'most-popular' + # (but only if num_parent_classifications > 1) + if num_vals == 1 && verify_method == 'most-popular' && num_parent_classifications > 1 atts[:status] = 'complete' # .. Otherwise, activate the generated subject into the next workflow: else - classification.child_subject.activate! - atts.delete :status + atts[:status] = 'active' end end end @@ -68,9 +74,11 @@ def process_classification(classification) atts[:creating_user_ids] ||= [] classification.child_subject.creating_user_ids.push classification.user_id - # puts "Saving atts to classification: #{atts.inspect}" classification.child_subject.update_attributes atts + # Now that child subj is saved (with a parent subject_set) Fire activate hooks if activating: + classification.child_subject.activate! if atts[:status] == 'active' + classification.child_subject end diff --git a/app/models/subject_generation_methods/most_popular.rb b/app/models/subject_generation_methods/most_popular.rb index f851e28b..e4fd1cc2 100644 --- a/app/models/subject_generation_methods/most_popular.rb +++ b/app/models/subject_generation_methods/most_popular.rb @@ -12,7 +12,11 @@ def process_classification(classification) ann = classification.annotation - most_popular = classification.child_subject.calculate_most_popular_parent_classification + # Get most popular annotation in all classifications that are siblings to + # this classification as well as classifications made upone this classification's + # generated subject (effectively combine transcriptions and verify votes) + weights = classification.subject.parent_and_descendent_classifications_grouped + most_popular = weights.first atts[:data] = most_popular[:ann] num_parent_classifications = classification.child_subject.parent_classifications.count diff --git a/app/models/user.rb b/app/models/user.rb index 00258c5f..11baa88d 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -36,7 +36,7 @@ class User field :profile_url, :type => String # URI of user profile, if any field :status, :type => String, :default => 'active' - field :role, :type => String, :default => 'user' # user, admin, team + field :role, :type => String, :default => 'user' # user, admin, team, bot field :guest, :type => Boolean, :default => false field :tutorial_complete, :type => Boolean, :default => false @@ -221,5 +221,4 @@ def self.group_by_hour(match={}) h end end - end diff --git a/app/models/workflow.rb b/app/models/workflow.rb index 7a380337..dba27ead 100644 --- a/app/models/workflow.rb +++ b/app/models/workflow.rb @@ -6,7 +6,7 @@ class Workflow field :key, type: String field :label, type: String field :first_task, type: String - field :retire_limit, type: Integer, default: 3 + field :retire_limit, type: Float, default: 0.75 field :subject_fetch_limit, type: Integer, default: 10 field :generates_subjects, type: Boolean, default: true field :generates_subjects_after, type: Integer, default: 0 @@ -19,7 +19,9 @@ class Workflow # this `false` to prevent a user's transcriptions from being verified by same # user: field :subjects_classifiable_by_creator, type: Boolean, default: true - field :active_subjects, type: Integer, default: 0 + # Controls whether the user-generated subject shown may be "edited" (cloned, really) + # Currently only implemented as an EDIT button in Verify (user can transcribe using a prev transcription as a basis) + field :subjects_editable, type: Boolean, default: false field :order, type: Integer, default: 0 has_many :subjects @@ -30,6 +32,10 @@ class Workflow index project_id: 1, name: 1 + def active_subjects + subjects.active.count + end + def subject_has_enough_classifications(subject) subject.classification_count >= self.generates_subjects_after end diff --git a/app/models/workflow_task.rb b/app/models/workflow_task.rb index 81a743d3..7d10d6ba 100644 --- a/app/models/workflow_task.rb +++ b/app/models/workflow_task.rb @@ -10,6 +10,7 @@ class WorkflowTask field :next_task, type: String field :help, type: Hash field :examples, type: Array + field :export_name, type: String embedded_in :workflow diff --git a/app/serializers/final_data_serializer.rb b/app/serializers/final_data_serializer.rb deleted file mode 100644 index e9256d0b..00000000 --- a/app/serializers/final_data_serializer.rb +++ /dev/null @@ -1,19 +0,0 @@ -class FinalDataSerializer < ActiveModel::MongoidSerializer - attributes :data, :links, :meta - - root false - - def data - options = serialization_options.merge({root: false}) - object.map { |s| FinalDataSubjectSetSerializer.new(s, root: false) } - end - - def meta - { - } - end - - def links - {} - end -end diff --git a/app/serializers/final_data_subject_serializer.rb b/app/serializers/final_data_subject_serializer.rb deleted file mode 100644 index a2573cfa..00000000 --- a/app/serializers/final_data_subject_serializer.rb +++ /dev/null @@ -1,85 +0,0 @@ -class FinalDataSubjectSerializer < ActiveModel::MongoidSerializer - - attributes :id, :type, :location, :region, :width, :height, :meta_data - attributes :data # , :task - attributes :classification_count - attributes :generated_in_workflow - attributes :child_subjects - attributes :transcription_classifications - - def attributes - data = super - - # For brevity, remove attributes that are redundant or always null: - - if data[:type] == 'root' - # Root subjects don't have data: - data.delete :data - data.delete :generated_in_workflow - - else - # All of these are inherited from parent subject, so remove: - data.delete :location - data.delete :width - data.delete :height - data.delete :meta_data - end - - if data[:generated_in_workflow] == 'mark' - # Mark subjects have roughly same info in :data and :region so keep :region - data.delete :data - else - # .. For all other child subjects, delete :region since it's avail in parent - data.delete :region - end - data.delete :transcription_classifications if data[:transcription_classifications].empty? - data.delete :child_subjects if data[:child_subjects].empty? - - data - end - - def generated_in_workflow - return nil if object.parent_subject.nil? - puts "parent subj: #{object}" - object.parent_subject.classifications.first.workflow.name - end - - def child_subjects - object.child_subjects.map { |s| FinalDataSubjectSerializer.new(s, root: false) } - end - - def task - return nil if object.parent_workflow_task.nil? - - task = object.parent_workflow_task - { - instruction: task.instruction, - help: task.help, - tool: task.tool, - tool_config: task.tool_config - } - end - - def classification_count - object.classifications.count - end - - def id - object._id.to_s - end - - def include_data? - ! object.data.nil? - end - - def include_task? - ! object.parent_workflow_task.nil? - end - - def transcription_classifications - transcribe_workflow_id = Workflow.where(name:"transcribe").to_a[0]._id - transcription_classifications = object.classifications.where( {workflow_id: transcribe_workflow_id} ).to_a - object.classifications.where( {workflow_id: transcribe_workflow_id} ).map{ |c| FinalClassificationSerializer.new(c, root: false) } - end - -end diff --git a/app/serializers/final_data_subject_set_serializer.rb b/app/serializers/final_data_subject_set_serializer.rb deleted file mode 100644 index ebc04924..00000000 --- a/app/serializers/final_data_subject_set_serializer.rb +++ /dev/null @@ -1,17 +0,0 @@ -class FinalDataSubjectSetSerializer < ActiveModel::MongoidSerializer - - attributes :id - attributes :name - attributes :meta_data - attributes :classification_count - attributes :subjects - - def subjects - object.subjects.root.map { |s| FinalDataSubjectSerializer.new(s, root: false) } - end - - def id - object._id.to_s - end - -end diff --git a/app/serializers/final_subject_assertion_serializer.rb b/app/serializers/final_subject_assertion_serializer.rb new file mode 100644 index 00000000..0e223109 --- /dev/null +++ b/app/serializers/final_subject_assertion_serializer.rb @@ -0,0 +1,19 @@ +class FinalSubjectAssertionSerializer < ActiveModel::MongoidSerializer + + attributes :id, :status + attributes :name + attributes :created_in_workflow + attributes :confidence + attributes :data + attributes :versions + attributes :region + attributes :task_key + attributes :instructions + + root false + + def id + object.id.to_s + end + +end diff --git a/app/serializers/final_subject_serializer.rb b/app/serializers/final_subject_serializer.rb new file mode 100644 index 00000000..95f13ca4 --- /dev/null +++ b/app/serializers/final_subject_serializer.rb @@ -0,0 +1,11 @@ +class FinalSubjectSerializer < ActiveModel::MongoidSerializer + + attributes :id, :type, :location, :status, :width, :height, :meta_data + has_many :assertions + + # scope :by_keyword, -> (keyword) { where(: keyword) } + + def id + object.id.to_s + end +end diff --git a/app/serializers/final_subject_set_serializer.rb b/app/serializers/final_subject_set_serializer.rb new file mode 100644 index 00000000..aeca277c --- /dev/null +++ b/app/serializers/final_subject_set_serializer.rb @@ -0,0 +1,14 @@ +class FinalSubjectSetSerializer < ActiveModel::MongoidSerializer + + attributes :id, :meta_data, :type, :search_terms_by_field + + has_many :subjects + + def id + object.id.to_s + end + + def type + 'final_subject_set' + end +end diff --git a/app/serializers/generic_result_serializer.rb b/app/serializers/generic_result_serializer.rb new file mode 100644 index 00000000..7990790f --- /dev/null +++ b/app/serializers/generic_result_serializer.rb @@ -0,0 +1,42 @@ +# Generic serializer for arrays of objects of arbitrary types +# Produces JSONAPI style results with pagination meta +class GenericResultSerializer < ActiveModel::MongoidSerializer + attributes :data, :links, :meta + + root false + + # This serializes both single objects and arrays of objects, so data should output either a hash or an array respectively: + def data + options = serialization_options.merge({root: false, scope: scope}) + + # Array of results? + if object.respond_to? :each + return [] if object.empty? + + # Determine what serializer to use based on class of first item: + klass = object.first.class.to_s + serializer_class = eval("#{klass}Serializer") + object.map { |s| serializer_class.new(s, options) } + + else + # Determine what serializer to use based on class of first item: + klass = object.class.to_s + serializer_class = eval("#{klass}Serializer") + serializer_class.new(object, options) + end + end + + def meta + { + current_page: object.current_page, + next_page: object.next_page, + prev_page: object.prev_page, + total_pages: object.total_pages, + total: object.count + } if object.respond_to? :current_page + end + + def links + serialization_options[:links] + end +end diff --git a/app/serializers/project_serializer.rb b/app/serializers/project_serializer.rb index b12ec002..6b129840 100644 --- a/app/serializers/project_serializer.rb +++ b/app/serializers/project_serializer.rb @@ -1,20 +1,16 @@ class ProjectSerializer < ActiveModel::MongoidSerializer - attributes :id, :title, :short_title, :summary, :home_page_content, :organizations , :team, :pages, :menus, :partials, :logo, :background, :workflows, :forum, :tutorial, :feedback_form_url, :metadata_search, :terms_map, :blog_url, :discuss_url, :privacy_policy + attributes :id, :title, :short_title, :summary, :home_page_content, :organizations , :team, :pages, :menus, :partials, :logo, :background, :workflows, :forum, :tutorial, :feedback_form_url, :metadata_search, :terms_map, :blog_url, :discuss_url, :privacy_policy, :downloadable_data + attributes :classification_count has_many :workflows # delegate :current_or_guest_user, to: :scope - def id - object._id.to_s + def classification_count + Classification.count end -=begin - def current_user_tutorial - user = scope.nil? ? nil : current_or_guest_user - unless user == nil - user.tutorial_complete - end + def id + object._id.to_s end -=end end diff --git a/app/serializers/subject_set_serializer.rb b/app/serializers/subject_set_serializer.rb index e55db548..ddbb1528 100644 --- a/app/serializers/subject_set_serializer.rb +++ b/app/serializers/subject_set_serializer.rb @@ -31,7 +31,7 @@ def selected_subject_id end def subjects_count - object.subjects.count + object.subjects.root.count end =begin diff --git a/app/serializers/workflow_serializer.rb b/app/serializers/workflow_serializer.rb index 6627c38f..dd842a0f 100644 --- a/app/serializers/workflow_serializer.rb +++ b/app/serializers/workflow_serializer.rb @@ -1,5 +1,5 @@ class WorkflowSerializer < ActiveModel::MongoidSerializer - attributes :id, :name, :label, :tasks, :retire_limit, :subject_fetch_limit, :first_task, :active_subjects, :generates_subjects_for, :order + attributes :id, :name, :label, :tasks, :retire_limit, :subject_fetch_limit, :first_task, :active_subjects, :generates_subjects_for, :order, :subjects_editable def id object._id.to_s diff --git a/app/views/admin/dashboard/index.html.erb b/app/views/admin/dashboard/index.html.erb index 0b434ca4..5944a649 100644 --- a/app/views/admin/dashboard/index.html.erb +++ b/app/views/admin/dashboard/index.html.erb @@ -106,7 +106,7 @@
-

Verify

+

Transcriptions Being Verified

0 Total @@ -117,11 +117,13 @@
Active:
-
Verify subjects actively being voted upon
+
Transcriptions actively being voted upon
Inactive:
-
Verify subjects waiting for one or more transcriptions before being activated
+
Transcriptions awaiting one or more additional transcriptions before being voted upon
Complete:
-
Verify subjects that have received sufficient votes to choose one best transcription
+
Transcriptions that skipped voting because transcriptions were identical
+
Retired:
+
Transcriptions taken out of voting because voting has ended
diff --git a/app/views/admin/data/index.html.erb b/app/views/admin/data/index.html.erb index 119376c1..41b03d76 100644 --- a/app/views/admin/data/index.html.erb +++ b/app/views/admin/data/index.html.erb @@ -1,36 +1,32 @@

Data

-
- -

<%= @num_complete %> complete subject(s) ready for export (<%= @num_non_root %> pending).

- -

Format

-<% { "JSON" => 'json', - "CSV" => 'csv' - }.each do |(label, key)| - input_id = "download_format_#{key}" -%> -
- /> - -
-<% end %> +<% if @export.nil? %> +

No data exports have yet been built.

+

Please run `rake project:export_final_data` first

-

Completeness

-<% { "Complete (Only crowd-verified subjects)" => 'complete', - "All (All data in a massive json struc)" => 'all' - }.each do |(label, key)| - input_id = "download_status_#{key}" -%> -
- /> - -
-<% end %> +<% else %> +

Most recent data export: + +

+
Items
+
<%= @export.num_final_subject_sets %>
+ +
Built
+
<%= @export.updated_at.strftime('%B %-d, %Y') %> +
-

Download

+ Download - +

Make Public?

-
+

Should the public be able to download the latest from /data/latest and subscribe to the data updates ATOM feed?

+ +
+ <%= check_box 'project', "downloadable_data" %> + <%= label 'project','downloadable_data', 'Allow the public to download data' %> + +

+
+ +<% end %> diff --git a/app/views/admin/subjects/show.html.erb b/app/views/admin/subjects/show.html.erb index b5c12be1..a21c9eb2 100644 --- a/app/views/admin/subjects/show.html.erb +++ b/app/views/admin/subjects/show.html.erb @@ -16,8 +16,8 @@
Status
<%= @subject.status %> <% if ! @subject.parent_subject.nil? %> - <% if @subject.parent_subject.workflow.generates_subjects_method == 'collect-unique' %> -
(Has <%= @subject.parent_classifications.count %> classifications. <%= @subject.parent_subject.workflow.generates_subjects_after %> total required to activate.) + <% if @subject.status == 'inactive' && @subject.parent_subject.workflow.generates_subjects_method == 'collect-unique' %> +
(Has <%= @subject.parent_classifications.count %> parent classifications. <%= @subject.parent_subject.workflow.generates_subjects_after %> total required to activate.) <% end %> <% if @subject.parent_subject.workflow.generates_subjects_method == 'most-popular' %>
(<%= @subject.parent_classifications.count %> parent classification(s) have <%= (100 * @subject.calculate_most_popular_parent_classification[:percentage]).to_i %>% agreement. At least <%= @subject.parent_subject.workflow.generates_subjects_after %> parent classifications with <%= (100 * @subject.parent_subject.workflow.generates_subjects_agreement).to_i %>% agreement required to activate.) @@ -27,7 +27,7 @@
(<%= @subject.flagged_bad_count %> "Bad Subject" votes) <% end %> <% if @subject.retire_count %> -
(<%= @subject.retire_count %> Retire votes) +
Votes for retrement: <%= (@subject.percentage_for_retire * 100).round %>% (<%= @subject.retire_count %> of <%= @subject.number_of_completion_assessments %> completion assessments) <% end %>
@@ -68,6 +68,7 @@ — <% else %> <%= render partial: 'partials/data_hash', locals: {hash: @subject.data} %> + <% end %> @@ -104,6 +105,18 @@ + <% if @subject.workflow && @subject.workflow.name == 'verify' %> +

Classifications Breakdown

+
    + <% @subject.parent_and_descendent_classifications_grouped.each do |cl| %> +
  • + <%= (100 * cl[:percentage]).round %>%: <%= cl[:votes] %> votes + <%= render partial: 'partials/data_hash', locals: {hash: cl[:ann] } %> +
  • + <% end %> +
+ <% end %> + <% end %>

Downstream

diff --git a/app/views/final_data_exports/index.atom.builder b/app/views/final_data_exports/index.atom.builder new file mode 100644 index 00000000..2b22ca15 --- /dev/null +++ b/app/views/final_data_exports/index.atom.builder @@ -0,0 +1,11 @@ +atom_feed do |feed| + + feed.title("#{Project.current.title} Data Exports") + feed.updated(@exports[0].created_at) if @exports.length > 0 + + @exports.each do |export| + feed.entry(export) do |entry| + entry.title("#{export.updated_at.strftime('%c')}: #{export.num_final_subject_sets} subjects") + end + end +end diff --git a/config/initializers/register_project_static_routes.rb b/config/initializers/register_project_static_routes.rb index 05cb4a14..9c61b0ad 100644 --- a/config/initializers/register_project_static_routes.rb +++ b/config/initializers/register_project_static_routes.rb @@ -1,7 +1,11 @@ API::Application.configure do - if Project.current - project_assets_path = "./project/#{Project.current.key}/assets" - puts "Routing static assets from #{project_assets_path}" - Rails.application.config.middleware.insert_after ActionDispatch::Static, ActionDispatch::Static, project_assets_path + begin + if Project.current + project_assets_path = "./project/#{Project.current.key}/assets" + puts "Routing static assets from #{project_assets_path}" + Rails.application.config.middleware.insert_after ActionDispatch::Static, ActionDispatch::Static, project_assets_path + end + rescue + puts "FAILED to register static routing" end end diff --git a/config/routes.rb b/config/routes.rb index b6b9f778..bb9148bc 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -35,10 +35,16 @@ resources :groups, only: [:show, :index], :defaults => { :format => 'json' } + # Final data: + resources :final_subject_sets, only: [:show, :index], :defaults => { :format => 'json' } + get '/data/latest', to: 'final_data_exports#latest' + resources :final_data_exports, only: [:show, :index], path: "/data" + namespace :admin do resources :subject_sets, :subjects, :classifications, :users get 'dashboard' => 'dashboard#index' get 'data' => 'data#index' + post 'data' => 'data#index' get 'data/download' => 'data#download' get 'signin' => 'auth#signin' post 'stats/recalculate' => 'dashboard#recalculate_stats' diff --git a/lib/tasks/bot.rake b/lib/tasks/bot.rake new file mode 100644 index 00000000..14870e3c --- /dev/null +++ b/lib/tasks/bot.rake @@ -0,0 +1,48 @@ +require 'fileutils' + +namespace :bot do + + desc "Create Bot with name, printing out token to use in HTTP_BOT_AUTH" + task :create, [:name] => :environment do |task, args| + args.with_defaults name: 'ScribeBot' + + ret = BotUser.create args[:name] + + if ! ret[:token].blank? + puts "Created #{ret[:user].name}. Use HTTP header to authenticate:" + puts " #{BotUser::AUTH_HEADER}=#{BotUser::pack_auth_header(ret[:user].id, ret[:token])}" + else + puts "#{ret[:user].name} already exists, so token can not be read but may be reset. Use bot:reset to reset token." + end + end + + desc "Reset Bot token with name, printing out token to use in HTTP_ROBOT_AUTH" + task :reset, [:name] => :environment do |task, args| + args.with_defaults name: 'ScribeBot' + + user = BotUser.find_by name: args[:name] + token = user.reset_token! + + if token + puts "Reset #{user.name}. Use HTTP header to authenticate:" + puts " #{BotUser::AUTH_HEADER}=#{BotUser::pack_auth_header(user.id, token)}" + end + end + + desc "Delete Bot by name" + task :delete, [:name] => :environment do |task, args| + if args[:name].blank? + puts "No name given. Aborting." + exit + end + + user = BotUser.find_by name: args[:name] + if user + user.destroy + puts "Removed #{user.name}" + else + puts "Bot user #{args[:name]} could not be found" + end + end + +end diff --git a/lib/tasks/project.rake b/lib/tasks/project.rake index 6bf1f6ed..6563b957 100644 --- a/lib/tasks/project.rake +++ b/lib/tasks/project.rake @@ -126,25 +126,10 @@ namespace :project do # load project_file_path project = Project.find_or_create_by key: project_key - # Establish some defaults so that if they're not set in the project hash, we overwrite the old value with the null default - project_defaults = { - background: nil, - logo: nil, - favicon: nil, - terms_map: {}, - team_emails: [], - team: [], - organizations: [], - analytics: nil, - forum: nil, - menus: {}, - partials: {} - } # Set all valid fields from hash: - project_hash = project_hash.inject(project_defaults) { |h, (k,v)| h[k] = v if Project.fields.keys.include?(k.to_s); h } + project_hash = project_hash.inject({}) { |h, (k,v)| h[k] = v if Project.fields.keys.include?(k.to_s); h } project.update project_hash - puts "Created project: #{project.title}" # Load pages from content/*: content_path = Rails.root.join('project', project_key, 'content') @@ -400,7 +385,93 @@ namespace :project do end + task :build_final_data, [:project_key, :rebuild] => :environment do |task, args| + args.with_defaults rebuild: true + rebuild = args[:rebuild] != 'false' + project = project_by_key args[:project_key] + + start = Time.now + count = project.subject_sets.count + limit = 100 + built = 0 + + # Rebuild indexes + FinalSubjectSet.rebuild_indexes Project.current + + (0..count).step(limit).each do |offset| + sets = project.subject_sets.offset(offset).limit(limit).each_with_index do |set, i| + + final_set = FinalSubjectSet.assert_for_set set, rebuild + + ellapsed = Time.now - start + per_set = ellapsed / built + remaining = per_set * (count - (offset + i+1)) / 60 / 60 + complete = (offset + i+1).to_f / count * 100 + # puts "Est time remaining: #{ellapsed} (#{per_set}) #{remaining}h" + $stderr.print "\r#{'%.8f' % complete}% complete. #{'%.1f' % remaining}h remaining. Built #{offset +i+1} of #{count}" + end + end + + end + + task :export_final_data, [:project_key] => :environment do |task, args| + project = project_by_key args[:project_key] + + # Make sure user has run build_final_data first: + if project.final_subject_sets.empty? + puts "No FinalSubjectSets found. Invoking project:build_final_data" + Rake::Task['project:build_final_data'].invoke(args[:project_key]) + puts "----------------" + end + + export_base = "tmp/export/#{project.key}" + + # Remove previous: + `rm -rf #{export_base}` if File.exists?(export_base) + + Dir.mkdir(export_base) unless File.exists?(export_base) + + start = Time.now + built = 0 + limit = 10 # 100 + count = FinalSubjectSet.count + count = 9 + + (0..count).step(limit).each do |offset| + project.final_subject_sets.offset(offset).limit(limit).each_with_index do |set, i| + path = "#{export_base}/#{set.subject_set_id}.json" + content = FinalSubjectSetSerializer.new(set, root:false).to_json + puts "content: #{content}" + File.open path, "w" do |f| + f << content + end + built += 1 + + # puts "Wrote #{i+1} of #{count}: #{content.size}b to #{path}" + ellapsed = Time.now - start + per_set = ellapsed / built + remaining = per_set * (count - (offset + i+1)) / 60 / 60 + complete = (offset + i+1).to_f / count * 100 + # puts "Est time remaining: #{ellapsed} (#{per_set}) #{remaining}h" + $stderr.print "\r#{'%.8f' % complete}% complete. #{'%.1f' % remaining}h remaining. Built #{offset +i+1} of #{count}" + end + end + + # Generate timestamped filename with random suffix so it can't be guessed: + rand_suffix = (('a'..'z').to_a + (0..9).to_a).shuffle[0,16].join + max_updated = project.final_subject_sets.max(:updated_at) + filename = "scribe-#{project.key}-#{max_updated.strftime("%F")}-#{rand_suffix}" + + path = "/#{filename}.zip" + puts "Zipping #{path}" + `zip --junk-paths -r public#{path} #{export_base}` + puts "Finished building exports. Download at: /#{filename}.zip" + + FinalDataExport.create path: path, num_final_subject_sets: count + + puts "Done." + end def translate_pick_one_tool_config(task_hash) @@ -487,5 +558,10 @@ namespace :project do end end + def project_by_key(key, default=Project.current) + p = Project.find_by key: key + p = default if ! p + p + end end diff --git a/project/emigrant/assets/images/left_to_mark.mp4 b/project/emigrant/assets/images/left_to_mark.mp4 new file mode 100644 index 00000000..cf669ea8 Binary files /dev/null and b/project/emigrant/assets/images/left_to_mark.mp4 differ diff --git a/project/emigrant/assets/images/m_date_5.png b/project/emigrant/assets/images/m_date_5.png new file mode 100644 index 00000000..aaf0a210 Binary files /dev/null and b/project/emigrant/assets/images/m_date_5.png differ diff --git a/project/emigrant/assets/images/m_loan_5.png b/project/emigrant/assets/images/m_loan_5.png new file mode 100644 index 00000000..8aab3c50 Binary files /dev/null and b/project/emigrant/assets/images/m_loan_5.png differ diff --git a/project/emigrant/assets/images/mark_amount_loaned.mp4 b/project/emigrant/assets/images/mark_amount_loaned.mp4 new file mode 100644 index 00000000..9169c4bf Binary files /dev/null and b/project/emigrant/assets/images/mark_amount_loaned.mp4 differ diff --git a/project/emigrant/bot-example.rb b/project/emigrant/bot-example.rb new file mode 100644 index 00000000..dcdd1451 --- /dev/null +++ b/project/emigrant/bot-example.rb @@ -0,0 +1,151 @@ + +require 'open-uri' +require 'json' +require 'cgi' + +# Useful extension to Hash to create query strings: +class Hash + def to_params + params = '' + stack = [] + + each do |k, v| + if v.is_a?(Hash) + stack << [k,v] + elsif v.is_a?(Array) + stack << [k,Hash.from_array(v)] + else + params << "#{k}=#{v}&" + end + end + + stack.each do |parent, hash| + hash.each do |k, v| + if v.is_a?(Hash) + stack << ["#{parent}[#{k}]", v] + else + params << "#{parent}[#{k}]=#{v}&" + end + end + end + + params.chop! + params + end + + def self.from_array(array = []) + h = Hash.new + array.size.times do |t| + h[t] = array[t] + end + h + end + +end + +# Example Scribe bot class: +class ScribeBot + + def initialize(scribe_endpoint) + @classifications_endpoint = scribe_endpoint + end + + # Post classification for a known subject_id + def classify_subject_by_id(subject_id, workflow_name, task_key, data) + params = { + workflow: { + name: workflow_name + }, + classifications: { + annotation: data, + task_key: task_key, + subject_id: subject_id + } + } + + submit_classification params + end + + # Post classification for subject specified by URL: + def classify_subject_by_url(subject_url, workflow_name, task_key, data) + params = { + subject: { + location: { + standard: CGI::escape(subject_url) + } + }, + workflow: { + name: workflow_name + }, + classifications: { + annotation: data, + task_key: task_key + } + } + + submit_classification params + end + + # Posts params as-is to classifications endpoint: + def submit_classification(params) + + require 'uri' + require "net/http" + + uri = URI(@classifications_endpoint) + + req = Net::HTTP::Post.new(uri.path, {'BOT_AUTH' => ENV['SCRIBE_BOT_TOKEN']}) + req.body = params.to_params + http = Net::HTTP.new(uri.host, uri.port) + + response = http.start {|http| http.request(req) } + + begin + JSON.parse response.body + rescue + nil + end + end +end + +# This simple script demonstrates use of the Scribe Classifications endpoint to generate data +# +# Useage: +# ruby bot-example.rb [-scribe-endpoint="http://localhost:3000"] +# + +options = Hash[ ARGV.join(' ').scan(/--?([^=\s]+)(?:=(\S+))?/) ] +options["scribe-endpoint"] = "http://localhost:3000/classifications" if ! options["scribe-endpoint"] + +args = ARGV.select { |a| ! a.match /^-/ } + +bot = ScribeBot.new options["scribe-endpoint"] + +# The following generates generates two classfiications: One mark classification +# and one transcription classification (applied to the subject generated by the +# mark classification). + +# Specify subject by standard URL (since this is a bot classification, it will be created automatically if it doesn't exist) +image_uri = "https://s3.amazonaws.com/scribe.nypl.org/emigrant-s4/full/619aed10-23fd-0133-16de-58d385a7bbd0.right-bottom.jpg" + +# Must manually specify workflow name ('mark'), and task_key ('mark_primary') +classification = bot.classify_subject_by_url( image_uri, "mark", "mark_primary", { + x: 100, + y: 200, + width: 300, + height: 200, + subToolIndex: 0 # Must specify subToolIndex (integer index into the tools array configured for workflow task) +})['classification'] + +# Response should contain a classification with a nested child_subject: +puts "Created classification: #{classification.to_json}" + +# Assuming above was successful, use the returned, generated subject_id to create next classification: +mark_id = classification['child_subject']['id'] +# Subjects generated in Mark tend to have `type`s that correspond to Transcribe task keys: +transcribe_task_key = classification['child_subject']['type'] +# Create transcription classification: +classification = bot.classify_subject_by_id( mark_id, "transcribe", transcribe_task_key, { value: 'foo' }) + +# Response should contain a classification with a nested verify subject (or orphaned subject if there is no Verify workflow) +puts "Created transcription classification: #{classification.to_json}" diff --git a/project/emigrant/content/help/learn_marking.md b/project/emigrant/content/help/learn_marking.md index 3b3c2e31..de90c79a 100644 --- a/project/emigrant/content/help/learn_marking.md +++ b/project/emigrant/content/help/learn_marking.md @@ -1,13 +1,11 @@ #Hi! - + Your task is to identify fields in a series of bank records. You'll be working with digitized pages from the Bank's record books. Typically, each page has two pages. Sometimes, one or both of the records are obscured by another image or an insert on the page. If there's an issue preventing you from reading the records or if there is no information recorded on the page, select the "Bad Page" button in the right hand pane. Use the menu on the right to select a field (e.g. "Record Date"). Then, use your mouse to draw a box on the image around each place that the field appears. As an aid, the fields are color coded. All the boxes for that field will appear in the same color. For additional, field specific guidance select the help buttons next to each field label. You may see existing rectangles on the page. If this is the case, someone has already marked the field and there's no need to mark an area twice. -You can manipulate the digitized pages with tools on the left hand side of the screen. Here you'll find the zoom tool and a mark toggle. +You can manipulate the digitized pages with tools on the left hand side of the screen. Here you'll find the zoom tool and a mark toggle. The marks created in this task identify the fields to be transcribed in the next task phase. - - diff --git a/project/emigrant/content/help/learn_transcribing.md b/project/emigrant/content/help/learn_transcribing.md index 266d388d..ab5e98ed 100644 --- a/project/emigrant/content/help/learn_transcribing.md +++ b/project/emigrant/content/help/learn_transcribing.md @@ -1,5 +1,5 @@ #Transcribe - -Transcribe the text exactly as you see it. For additional help and sample transcriptions, each transcription prompt has an "Need some help" button providing field specific context. + +Transcribe the text exactly as you see it. For additional help and sample transcriptions, each transcription prompt has an "Need some help" button providing field specific context. If the text is hard to read, transcribe as best you can. These transcriptions will be sorted through in the Verify stage. diff --git a/project/emigrant/content/help/learn_verifying.md b/project/emigrant/content/help/learn_verifying.md index 81507d55..ff29a55d 100644 --- a/project/emigrant/content/help/learn_verifying.md +++ b/project/emigrant/content/help/learn_verifying.md @@ -1,5 +1,5 @@ #Verify - + In this task, you'll perform quality control on the transcriptions. Select the best transcription from those provided. If none appear correct, you may choose to enter your own. Once a consensus has been achieved, we'll accept the transcription as accurate and add the value to the index. diff --git a/project/emigrant/content/help/m_record_amount_loaned.md b/project/emigrant/content/help/m_record_amount_loaned.md index 36a90fe0..eff44a91 100644 --- a/project/emigrant/content/help/m_record_amount_loaned.md +++ b/project/emigrant/content/help/m_record_amount_loaned.md @@ -1,5 +1,5 @@ # Amount Loaned -

The amount loaned is typically found at or near the bottom of the record.

+

The amount loaned is typically found at or near the bottom of the record. If a record notes multiple amounts loaned, mark each individually.

Sample Image One