|
| 1 | +require 'icalendar' |
| 2 | +require 'nokogiri' |
| 3 | +require 'open-uri' |
| 4 | +require 'tzinfo' |
| 5 | + |
| 6 | +module Ingestors |
| 7 | + module Heptraining |
| 8 | + class GrayScottIngestor < Ingestor |
| 9 | + def self.config |
| 10 | + { |
| 11 | + key: 'gray_scott_event', |
| 12 | + title: 'Gray Scott Events API', |
| 13 | + category: :events, |
| 14 | + user_agent: 'TeSS Gray Scott ingestor' |
| 15 | + } |
| 16 | + end |
| 17 | + |
| 18 | + def read(url) |
| 19 | + @verbose = false |
| 20 | + process_gray_scott(url) |
| 21 | + end |
| 22 | + |
| 23 | + private |
| 24 | + |
| 25 | + def process_gray_scott(url) |
| 26 | + events = Icalendar::Event.parse(open_url(url, raise: true).set_encoding('utf-8')) |
| 27 | + raise 'Not found' if events.nil? || events.empty? |
| 28 | + |
| 29 | + events.each do |e| |
| 30 | + process_calevent(e, url) |
| 31 | + end |
| 32 | + end |
| 33 | + |
| 34 | + def process_calevent(calevent, url) |
| 35 | + # puts "calevent: #{calevent.inspect}" |
| 36 | + gs_url = calevent.custom_properties.find { |key, _| key.include?('http') }&.last&.first&.strip&.gsub(%r{^[/\s]+|[/\s]+$}, '')&.prepend('https://') |
| 37 | + html = get_html_from_url(get_gray_scott_redirection(gs_url)) |
| 38 | + |
| 39 | + event = OpenStruct.new |
| 40 | + event.title = calevent.summary.to_s |
| 41 | + event.url = gs_url |
| 42 | + html_description = html.css('.paragraphStyle').text.to_s.strip |
| 43 | + event.description = html_description.empty? ? calevent.description.to_s : html_description |
| 44 | + |
| 45 | + event.end = calevent.dtend&.to_time&.utc |
| 46 | + unless calevent.dtstart.nil? |
| 47 | + dtstart = calevent.dtstart |
| 48 | + event.start = dtstart&.to_time&.utc |
| 49 | + tzid = dtstart.ical_params['tzid'] |
| 50 | + event.timezone = tzid.first.to_s if !tzid.nil? && tzid.size.positive? |
| 51 | + end |
| 52 | + event.venue = clean_html(calevent.location.to_s) |
| 53 | + event.organizer = html.css('h3:contains("Speakers") + ul li a')&.map(&:text)&.map(&:strip)&.join(', ') # coma separated if multiple speakers |
| 54 | + |
| 55 | + @events << event |
| 56 | + end |
| 57 | + |
| 58 | + def get_gray_scott_redirection(url) |
| 59 | + uri = URI.parse(url) |
| 60 | + label = CGI.parse(uri.query)['label']&.first |
| 61 | + |
| 62 | + script_content = get_html_from_url(url).css('script').find { |s| s.content.include?('var dictReference') }&.content |
| 63 | + dict_match = script_content&.match(/var\s+dictReference\s*=\s*({[^}]+})/) |
| 64 | + return unless dict_match |
| 65 | + |
| 66 | + dict = JSON.parse(dict_match[1]) |
| 67 | + matched_value = dict[label] |
| 68 | + return url unless matched_value |
| 69 | + |
| 70 | + "#{uri.scheme}://#{uri.host}#{uri.path.sub(%r{/[^/]+$}, '')}/#{matched_value}" |
| 71 | + end |
| 72 | + |
| 73 | + def clean_html(html) |
| 74 | + Nokogiri::HTML::DocumentFragment.parse(html).text.strip |
| 75 | + end |
| 76 | + end |
| 77 | + end |
| 78 | +end |
0 commit comments