onebox 2.2.14 → 2.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/lib/onebox/engine.rb +6 -10
  3. data/lib/onebox/engine/allowlisted_generic_onebox.rb +0 -9
  4. data/lib/onebox/engine/amazon_onebox.rb +23 -16
  5. data/lib/onebox/engine/flickr_onebox.rb +2 -2
  6. data/lib/onebox/engine/gfycat_onebox.rb +26 -26
  7. data/lib/onebox/engine/github_commit_onebox.rb +1 -1
  8. data/lib/onebox/engine/github_folder_onebox.rb +1 -1
  9. data/lib/onebox/engine/google_docs_onebox.rb +22 -40
  10. data/lib/onebox/engine/google_maps_onebox.rb +10 -6
  11. data/lib/onebox/engine/google_photos_onebox.rb +6 -6
  12. data/lib/onebox/engine/imgur_onebox.rb +2 -2
  13. data/lib/onebox/engine/instagram_onebox.rb +2 -3
  14. data/lib/onebox/engine/pastebin_onebox.rb +11 -15
  15. data/lib/onebox/engine/pdf_onebox.rb +7 -15
  16. data/lib/onebox/engine/pubmed_onebox.rb +16 -12
  17. data/lib/onebox/engine/stack_exchange_onebox.rb +1 -1
  18. data/lib/onebox/engine/standard_embed.rb +0 -3
  19. data/lib/onebox/engine/trello_onebox.rb +3 -6
  20. data/lib/onebox/engine/youku_onebox.rb +0 -6
  21. data/lib/onebox/helpers.rb +2 -1
  22. data/lib/onebox/layout.rb +2 -14
  23. data/lib/onebox/matcher.rb +10 -8
  24. data/lib/onebox/mixins/git_blob_onebox.rb +3 -5
  25. data/lib/onebox/open_graph.rb +4 -4
  26. data/lib/onebox/preview.rb +2 -2
  27. data/lib/onebox/version.rb +1 -1
  28. data/templates/_layout.mustache +6 -2
  29. data/templates/allowlistedgeneric.mustache +8 -9
  30. data/templates/amazon.mustache +5 -2
  31. data/templates/githubblob.mustache +44 -34
  32. data/templates/githubcommit.mustache +1 -3
  33. data/templates/githubfolder.mustache +2 -2
  34. data/templates/githubgist.mustache +9 -6
  35. data/templates/githubissue.mustache +3 -3
  36. data/templates/githubpullrequest.mustache +1 -1
  37. data/templates/gitlabblob.mustache +11 -4
  38. data/templates/googledocs.mustache +2 -2
  39. data/templates/googledrive.mustache +2 -2
  40. data/templates/googleplayapp.mustache +2 -1
  41. data/templates/instagram.mustache +1 -1
  42. data/templates/pastebin.mustache +6 -2
  43. data/templates/pdf.mustache +6 -3
  44. data/templates/stackexchange.mustache +1 -0
  45. data/templates/twitterstatus.mustache +20 -5
  46. data/templates/wikimedia.mustache +2 -2
  47. data/templates/wikipedia.mustache +2 -2
  48. data/templates/xkcd.mustache +2 -2
  49. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0b9626bc9bec1d423e617a946df2e4be0334cf6f7693056dc79865b50f127e26
4
- data.tar.gz: 9fc7c684bc99a33d356cfbf0c1a085e5dc1531aa6a6a3028670afb05406c4448
3
+ metadata.gz: 5e2169ef1cfb44f0208566f5ecff7bd7c5d2bbe2f12b58791f50aa24b776e56d
4
+ data.tar.gz: 9f9d38b578e46e6ce8c8c007577ff02522eab24f3dcc42f9eabd5b838a668e72
5
5
  SHA512:
6
- metadata.gz: 2a3f999936fbfe289e07b58b424e735d13c2e3132beffb16c41a6094fa42aa0dd1018d544175eba1f1ad7552b26cade7096d72ff286268a097230835d8f9c99d
7
- data.tar.gz: 1effdda4c94dc9dbded959fc3acf0a51d6efd09629cccd2e9c2c1df74d637c9b8cb577be7fb79dc90b3ba42125af6dfde0c56e33cb8b31f6d4a72d0090a0fded
6
+ metadata.gz: e293af57162b61ad0fa3472b7d9b35709f13504308e69eba45f40e9babff53e0258186add06ed4a89bc8305b8903dc49718c274659b5fca0712bc26d281ca7d4
7
+ data.tar.gz: 3bb70c552e010149bc32fd4c9dbe416cd6ae0098b4286e5894fbc12c07628b5079c0463ad78b54ab6dc773954e055c9195aea5ac180ab8bdf7dca3847db1bc96
data/lib/onebox/engine.rb CHANGED
@@ -28,23 +28,19 @@ module Onebox
28
28
  end
29
29
  end
30
30
 
31
- attr_reader :url, :uri
32
- attr_reader :timeout
31
+ attr_reader :url, :uri, :options, :timeout
33
32
  attr :errors
34
33
 
35
34
  DEFAULT = {}
36
- def options
37
- @options
38
- end
39
35
 
40
36
  def options=(opt)
41
- return @options if opt.nil? #make sure options provided
42
- opt = opt.to_h if opt.instance_of?(OpenStruct)
37
+ return @options if opt.nil? # make sure options provided
38
+ opt = opt.to_h if opt.instance_of?(OpenStruct)
43
39
  @options.merge!(opt)
44
40
  @options
45
41
  end
46
42
 
47
- def initialize(link, timeout = nil)
43
+ def initialize(url, timeout = nil)
48
44
  @errors = {}
49
45
  @options = DEFAULT
50
46
  class_name = self.class.name.split("::").last.to_s
@@ -52,8 +48,8 @@ module Onebox
52
48
  # Set the engine options extracted from global options.
53
49
  self.options = Onebox.options[class_name] || {}
54
50
 
55
- @url = link
56
- @uri = URI(link)
51
+ @url = url
52
+ @uri = URI(url)
57
53
  if always_https?
58
54
  @uri.scheme = 'https'
59
55
  @url = @uri.to_s
@@ -27,7 +27,6 @@ module Onebox
27
27
  500px.com
28
28
  8tracks.com
29
29
  abc.net.au
30
- about.com
31
30
  answers.com
32
31
  arstechnica.com
33
32
  ask.com
@@ -36,11 +35,9 @@ module Onebox
36
35
  bbs.boingboing.net
37
36
  bestbuy.ca
38
37
  bestbuy.com
39
- blip.tv
40
38
  bloomberg.com
41
39
  businessinsider.com
42
40
  change.org
43
- clikthrough.com
44
41
  cnet.com
45
42
  cnn.com
46
43
  codepen.io
@@ -90,7 +87,6 @@ module Onebox
90
87
  meetup.com
91
88
  mixcloud.com
92
89
  mlb.com
93
- myshopify.com
94
90
  myspace.com
95
91
  nba.com
96
92
  npr.org
@@ -98,16 +94,13 @@ module Onebox
98
94
  photobucket.com
99
95
  pinterest.com
100
96
  reference.com
101
- revision3.com
102
97
  rottentomatoes.com
103
98
  samsung.com
104
- screenr.com
105
99
  scribd.com
106
100
  slideshare.net
107
101
  sourceforge.net
108
102
  speakerdeck.com
109
103
  spotify.com
110
- squidoo.com
111
104
  streamable.com
112
105
  techcrunch.com
113
106
  ted.com
@@ -124,7 +117,6 @@ module Onebox
124
117
  twitpic.com
125
118
  usatoday.com
126
119
  viddler.com
127
- videojug.com
128
120
  vine.co
129
121
  walmart.com
130
122
  washingtonpost.com
@@ -275,7 +267,6 @@ module Onebox
275
267
 
276
268
  def rewrite_https(html)
277
269
  return unless html
278
- uri = URI(@url)
279
270
  if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
280
271
  html = html.gsub("http://", "https://")
281
272
  end
@@ -19,10 +19,8 @@ module Onebox
19
19
  # If possible, fetch the cached HTML body immediately so we can
20
20
  # try to grab the canonical URL from that document,
21
21
  # rather than guess at the best URL structure to use
22
- if body_cacher&.respond_to?('cache_response_body?')
23
- if body_cacher.cache_response_body?(uri.to_s) && body_cacher.cached_response_body_exists?(uri.to_s)
24
- @raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
25
- end
22
+ if !@raw && has_cached_body
23
+ @raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
26
24
  end
27
25
 
28
26
  if @raw
@@ -31,7 +29,8 @@ module Onebox
31
29
  end
32
30
 
33
31
  if match && match[:id]
34
- return "https://www.amazon.#{tld}/dp/#{Onebox::Helpers.uri_encode(match[:id])}"
32
+ id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
33
+ return "https://www.amazon.#{tld}/dp/#{id}"
35
34
  end
36
35
 
37
36
  @url
@@ -49,6 +48,12 @@ module Onebox
49
48
 
50
49
  private
51
50
 
51
+ def has_cached_body
52
+ body_cacher&.respond_to?('cache_response_body?') &&
53
+ body_cacher.cache_response_body?(uri.to_s) &&
54
+ body_cacher.cached_response_body_exists?(uri.to_s)
55
+ end
56
+
52
57
  def match
53
58
  @match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
54
59
  end
@@ -57,9 +62,9 @@ module Onebox
57
62
  if (main_image = raw.css("#main-image")) && main_image.any?
58
63
  attributes = main_image.first.attributes
59
64
 
60
- return attributes["data-a-hires"].to_s if attributes["data-a-hires"]
61
-
62
- if attributes["data-a-dynamic-image"]
65
+ if attributes["data-a-hires"]
66
+ return attributes["data-a-hires"].to_s
67
+ elsif attributes["data-a-dynamic-image"]
63
68
  return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
64
69
  end
65
70
  end
@@ -67,9 +72,11 @@ module Onebox
67
72
  if (landing_image = raw.css("#landingImage")) && landing_image.any?
68
73
  attributes = landing_image.first.attributes
69
74
 
70
- return attributes["data-old-hires"].to_s if attributes["data-old-hires"]
71
-
72
- landing_image.first["src"].to_s
75
+ if attributes["data-old-hires"]
76
+ return attributes["data-old-hires"].to_s
77
+ else
78
+ return landing_image.first["src"].to_s
79
+ end
73
80
  end
74
81
 
75
82
  if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
@@ -91,16 +98,16 @@ module Onebox
91
98
  end
92
99
 
93
100
  def multiple_authors(authors_xpath)
94
- author_list = raw.xpath(authors_xpath)
95
- authors = []
96
- author_list.each { |a| authors << a.inner_text.strip }
97
- authors.join(", ")
101
+ raw
102
+ .xpath(authors_xpath)
103
+ .map { |a| a.inner_text.strip }
104
+ .join(", ")
98
105
  end
99
106
 
100
107
  def data
101
108
  og = ::Onebox::OpenGraph.new(raw)
102
109
 
103
- if raw.at_css('#dp.book_mobile') #printed books
110
+ if raw.at_css('#dp.book_mobile') # printed books
104
111
  title = raw.at("h1#title")&.inner_text
105
112
  authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
106
113
  rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
@@ -32,7 +32,7 @@ module Onebox
32
32
  <span class='album-title'>#{album_title}</span>
33
33
  </span>
34
34
  </span>
35
- <img src='#{og.get_secure_image}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
35
+ <img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
36
36
  </a>
37
37
  </div>
38
38
  HTML
@@ -43,7 +43,7 @@ module Onebox
43
43
 
44
44
  <<-HTML
45
45
  <a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
46
- <img src='#{og.get_secure_image}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
46
+ <img src='#{og.secure_image_url}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
47
47
  </a>
48
48
  HTML
49
49
  end
@@ -9,8 +9,8 @@ module Onebox
9
9
  matches_regexp(/^https?:\/\/gfycat\.com\//)
10
10
  always_https
11
11
 
12
+ # This engine should have priority over AllowlistedGenericOnebox.
12
13
  def self.priority
13
- # This engine should have priority over AllowlistedGenericOnebox.
14
14
  1
15
15
  end
16
16
 
@@ -21,6 +21,7 @@ module Onebox
21
21
  <img src="https://gfycat.com/static/favicons/favicon-96x96.png" class="site-icon" width="64" height="64">
22
22
  <a href="#{data[:url]}" target="_blank" rel="nofollow ugc noopener">Gfycat.com</a>
23
23
  </header>
24
+
24
25
  <article class="onebox-body">
25
26
  <h4>
26
27
  #{data[:title]} by
@@ -36,11 +37,12 @@ module Onebox
36
37
  <img title="Sorry, your browser doesn't support HTML5 video." src="#{data[:posterUrl]}">
37
38
  </video>
38
39
  </div>
40
+
39
41
  <p>
40
42
  <span class="label1">#{data[:keywords]}</span>
41
43
  </p>
42
-
43
44
  </article>
45
+
44
46
  <div style="clear: both"></div>
45
47
  </aside>
46
48
  HTML
@@ -61,52 +63,50 @@ module Onebox
61
63
  @match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?<name>.+)/)
62
64
  end
63
65
 
64
- def nokogiri_page
65
- @nokogiri_page ||= begin
66
- response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
67
- Nokogiri::HTML(response)
68
- end
69
- end
66
+ def og_data
67
+ return @og_data if defined?(@og_data)
70
68
 
71
- def get_og_data
72
- og_data = {}
69
+ response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
70
+ page = Nokogiri::HTML(response)
71
+ script = page.at_css('script[type="application/ld+json"]')
73
72
 
74
- if json_string = nokogiri_page.at_css('script[type="application/ld+json"]')&.text
75
- og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
73
+ if json_string = script&.text
74
+ @og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
75
+ else
76
+ @og_data = {}
76
77
  end
77
-
78
- og_data
79
78
  end
80
79
 
81
80
  def data
82
- og_data = get_og_data
81
+ return @data if defined?(@data)
83
82
 
84
- response = {
83
+ @data = {
85
84
  name: match[:name],
86
85
  title: og_data[:headline] || 'No Title',
87
86
  author: og_data[:author],
88
- url: @url
87
+ url: @url,
89
88
  }
90
89
 
91
- keywords = og_data[:keywords]&.split(',')
92
- if keywords
93
- response[:keywords] = keywords.map { |t| "<a href='https://gfycat.com/gifs/search/#{t}'>##{t}</a>" }.join(' ')
90
+ if keywords = og_data[:keywords]&.split(',')
91
+ @data[:keywords] = keywords
92
+ .map { |keyword| "<a href='https://gfycat.com/gifs/search/#{keyword}'>##{keyword}</a>" }
93
+ .join(' ')
94
94
  end
95
95
 
96
96
  if og_data[:video]
97
97
  content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
98
98
  video_url = Pathname.new(content_url)
99
- response[:webmUrl] = video_url.sub_ext(".webm").to_s
100
- response[:mp4Url] = video_url.sub_ext(".mp4").to_s
99
+ @data[:webmUrl] = video_url.sub_ext(".webm").to_s
100
+ @data[:mp4Url] = video_url.sub_ext(".mp4").to_s
101
101
 
102
102
  thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
103
- response[:posterUrl] = thumbnail_url
103
+ @data[:posterUrl] = thumbnail_url
104
104
 
105
- response[:width] = og_data[:video][:width]
106
- response[:height] = og_data[:video][:height]
105
+ @data[:width] = og_data[:video][:width]
106
+ @data[:height] = og_data[:video][:height]
107
107
  end
108
108
 
109
- response
109
+ @data
110
110
  end
111
111
  end
112
112
  end
@@ -10,7 +10,7 @@ module Onebox
10
10
  include JSON
11
11
  include Onebox::Mixins::GithubBody
12
12
 
13
- matches_regexp Regexp.new("^https?://(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:/)?(?:.)*/commit/")
13
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
14
14
  always_https
15
15
 
16
16
  def url
@@ -7,7 +7,7 @@ module Onebox
7
7
  include StandardEmbed
8
8
  include LayoutSupport
9
9
 
10
- matches_regexp Regexp.new(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
10
+ matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
11
11
  always_https
12
12
 
13
13
  def self.priority
@@ -4,61 +4,43 @@ module Onebox
4
4
  module Engine
5
5
  class GoogleDocsOnebox
6
6
  include Engine
7
+ include StandardEmbed
7
8
  include LayoutSupport
8
9
 
9
- def self.supported_endpoints
10
- %w(spreadsheets document forms presentation)
11
- end
12
-
13
- def self.short_types
14
- @shorttypes ||= {
15
- spreadsheets: :sheets,
16
- document: :docs,
17
- presentation: :slides,
18
- forms: :forms,
19
- }
20
- end
10
+ SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
11
+ SHORT_TYPES = {
12
+ spreadsheets: :sheets,
13
+ document: :docs,
14
+ presentation: :slides,
15
+ forms: :forms,
16
+ }
21
17
 
22
- matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{supported_endpoints.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
18
+ matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
23
19
  always_https
24
20
 
25
- protected
21
+ private
26
22
 
27
23
  def data
28
- og_data = get_og_data
24
+ og_data = get_opengraph
25
+ short_type = SHORT_TYPES[match[:endpoint].to_sym]
26
+
27
+ description = if Onebox::Helpers.blank?(og_data.description)
28
+ "This #{short_type.to_s.chop.capitalize} is private"
29
+ else
30
+ Onebox::Helpers.truncate(og_data.description, 250)
31
+ end
32
+
29
33
  {
30
34
  link: link,
31
- title: og_data[:title] || "Google #{shorttype.to_s.capitalize}",
32
- description: Onebox::Helpers.truncate(og_data[:description], 250) || "This #{shorttype.to_s.chop.capitalize} is private",
33
- type: shorttype
35
+ title: og_data.title || "Google #{short_type.to_s.capitalize}",
36
+ description: description,
37
+ type: short_type
34
38
  }
35
39
  end
36
40
 
37
- def doc_type
38
- @doc_type ||= match[:endpoint].to_sym
39
- end
40
-
41
- def shorttype
42
- GoogleDocsOnebox.short_types[doc_type]
43
- end
44
-
45
41
  def match
46
42
  @match ||= @url.match(@@matcher)
47
43
  end
48
-
49
- def get_og_data
50
- response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
51
- html = Nokogiri::HTML(response)
52
- og_data = {}
53
- html.css('meta').each do |m|
54
- if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
55
- m_content = m.attribute('content').to_s.strip
56
- m_property = m.attribute('property').to_s.gsub('og:', '')
57
- og_data[m_property.to_sym] = m_content
58
- end
59
- end
60
- og_data
61
- end
62
44
  end
63
45
  end
64
46
  end
@@ -119,8 +119,6 @@ module Onebox
119
119
  @placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
120
120
 
121
121
  when :canonical
122
- uri = URI(@url)
123
-
124
122
  query = URI::decode_www_form(uri.query).to_h
125
123
  if !query.has_key?("ll")
126
124
  raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
@@ -163,14 +161,20 @@ module Onebox
163
161
  end
164
162
 
165
163
  def follow_redirect!
166
- uri = URI(@url)
167
164
  begin
168
- http = Net::HTTP.start(uri.host, uri.port,
169
- use_ssl: uri.scheme == 'https', open_timeout: timeout, read_timeout: timeout)
170
- response = http.head(uri.path)
165
+ http = Net::HTTP.start(
166
+ uri.host,
167
+ uri.port,
168
+ use_ssl: uri.scheme == 'https',
169
+ open_timeout: timeout,
170
+ read_timeout: timeout
171
+ )
171
172
 
173
+ response = http.head(uri.path)
172
174
  raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
175
+
173
176
  @url = response.code == "200" ? uri.to_s : response["Location"]
177
+ @uri = URI(@url)
174
178
  ensure
175
179
  http.finish rescue nil
176
180
  end
OSZAR »