onebox 2.2.14 → 2.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/onebox/engine.rb +6 -10
- data/lib/onebox/engine/allowlisted_generic_onebox.rb +0 -9
- data/lib/onebox/engine/amazon_onebox.rb +23 -16
- data/lib/onebox/engine/flickr_onebox.rb +2 -2
- data/lib/onebox/engine/gfycat_onebox.rb +26 -26
- data/lib/onebox/engine/github_commit_onebox.rb +1 -1
- data/lib/onebox/engine/github_folder_onebox.rb +1 -1
- data/lib/onebox/engine/google_docs_onebox.rb +22 -40
- data/lib/onebox/engine/google_maps_onebox.rb +10 -6
- data/lib/onebox/engine/google_photos_onebox.rb +6 -6
- data/lib/onebox/engine/imgur_onebox.rb +2 -2
- data/lib/onebox/engine/instagram_onebox.rb +2 -3
- data/lib/onebox/engine/pastebin_onebox.rb +11 -15
- data/lib/onebox/engine/pdf_onebox.rb +7 -15
- data/lib/onebox/engine/pubmed_onebox.rb +16 -12
- data/lib/onebox/engine/stack_exchange_onebox.rb +1 -1
- data/lib/onebox/engine/standard_embed.rb +0 -3
- data/lib/onebox/engine/trello_onebox.rb +3 -6
- data/lib/onebox/engine/youku_onebox.rb +0 -6
- data/lib/onebox/helpers.rb +2 -1
- data/lib/onebox/layout.rb +2 -14
- data/lib/onebox/matcher.rb +10 -8
- data/lib/onebox/mixins/git_blob_onebox.rb +3 -5
- data/lib/onebox/open_graph.rb +4 -4
- data/lib/onebox/preview.rb +2 -2
- data/lib/onebox/version.rb +1 -1
- data/templates/_layout.mustache +6 -2
- data/templates/allowlistedgeneric.mustache +8 -9
- data/templates/amazon.mustache +5 -2
- data/templates/githubblob.mustache +44 -34
- data/templates/githubcommit.mustache +1 -3
- data/templates/githubfolder.mustache +2 -2
- data/templates/githubgist.mustache +9 -6
- data/templates/githubissue.mustache +3 -3
- data/templates/githubpullrequest.mustache +1 -1
- data/templates/gitlabblob.mustache +11 -4
- data/templates/googledocs.mustache +2 -2
- data/templates/googledrive.mustache +2 -2
- data/templates/googleplayapp.mustache +2 -1
- data/templates/instagram.mustache +1 -1
- data/templates/pastebin.mustache +6 -2
- data/templates/pdf.mustache +6 -3
- data/templates/stackexchange.mustache +1 -0
- data/templates/twitterstatus.mustache +20 -5
- data/templates/wikimedia.mustache +2 -2
- data/templates/wikipedia.mustache +2 -2
- data/templates/xkcd.mustache +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e2169ef1cfb44f0208566f5ecff7bd7c5d2bbe2f12b58791f50aa24b776e56d
|
4
|
+
data.tar.gz: 9f9d38b578e46e6ce8c8c007577ff02522eab24f3dcc42f9eabd5b838a668e72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e293af57162b61ad0fa3472b7d9b35709f13504308e69eba45f40e9babff53e0258186add06ed4a89bc8305b8903dc49718c274659b5fca0712bc26d281ca7d4
|
7
|
+
data.tar.gz: 3bb70c552e010149bc32fd4c9dbe416cd6ae0098b4286e5894fbc12c07628b5079c0463ad78b54ab6dc773954e055c9195aea5ac180ab8bdf7dca3847db1bc96
|
data/lib/onebox/engine.rb
CHANGED
@@ -28,23 +28,19 @@ module Onebox
|
|
28
28
|
end
|
29
29
|
end
|
30
30
|
|
31
|
-
attr_reader :url, :uri
|
32
|
-
attr_reader :timeout
|
31
|
+
attr_reader :url, :uri, :options, :timeout
|
33
32
|
attr :errors
|
34
33
|
|
35
34
|
DEFAULT = {}
|
36
|
-
def options
|
37
|
-
@options
|
38
|
-
end
|
39
35
|
|
40
36
|
def options=(opt)
|
41
|
-
return @options if opt.nil? #make sure options provided
|
42
|
-
opt = opt.to_h
|
37
|
+
return @options if opt.nil? # make sure options provided
|
38
|
+
opt = opt.to_h if opt.instance_of?(OpenStruct)
|
43
39
|
@options.merge!(opt)
|
44
40
|
@options
|
45
41
|
end
|
46
42
|
|
47
|
-
def initialize(
|
43
|
+
def initialize(url, timeout = nil)
|
48
44
|
@errors = {}
|
49
45
|
@options = DEFAULT
|
50
46
|
class_name = self.class.name.split("::").last.to_s
|
@@ -52,8 +48,8 @@ module Onebox
|
|
52
48
|
# Set the engine options extracted from global options.
|
53
49
|
self.options = Onebox.options[class_name] || {}
|
54
50
|
|
55
|
-
@url =
|
56
|
-
@uri = URI(
|
51
|
+
@url = url
|
52
|
+
@uri = URI(url)
|
57
53
|
if always_https?
|
58
54
|
@uri.scheme = 'https'
|
59
55
|
@url = @uri.to_s
|
@@ -27,7 +27,6 @@ module Onebox
|
|
27
27
|
500px.com
|
28
28
|
8tracks.com
|
29
29
|
abc.net.au
|
30
|
-
about.com
|
31
30
|
answers.com
|
32
31
|
arstechnica.com
|
33
32
|
ask.com
|
@@ -36,11 +35,9 @@ module Onebox
|
|
36
35
|
bbs.boingboing.net
|
37
36
|
bestbuy.ca
|
38
37
|
bestbuy.com
|
39
|
-
blip.tv
|
40
38
|
bloomberg.com
|
41
39
|
businessinsider.com
|
42
40
|
change.org
|
43
|
-
clikthrough.com
|
44
41
|
cnet.com
|
45
42
|
cnn.com
|
46
43
|
codepen.io
|
@@ -90,7 +87,6 @@ module Onebox
|
|
90
87
|
meetup.com
|
91
88
|
mixcloud.com
|
92
89
|
mlb.com
|
93
|
-
myshopify.com
|
94
90
|
myspace.com
|
95
91
|
nba.com
|
96
92
|
npr.org
|
@@ -98,16 +94,13 @@ module Onebox
|
|
98
94
|
photobucket.com
|
99
95
|
pinterest.com
|
100
96
|
reference.com
|
101
|
-
revision3.com
|
102
97
|
rottentomatoes.com
|
103
98
|
samsung.com
|
104
|
-
screenr.com
|
105
99
|
scribd.com
|
106
100
|
slideshare.net
|
107
101
|
sourceforge.net
|
108
102
|
speakerdeck.com
|
109
103
|
spotify.com
|
110
|
-
squidoo.com
|
111
104
|
streamable.com
|
112
105
|
techcrunch.com
|
113
106
|
ted.com
|
@@ -124,7 +117,6 @@ module Onebox
|
|
124
117
|
twitpic.com
|
125
118
|
usatoday.com
|
126
119
|
viddler.com
|
127
|
-
videojug.com
|
128
120
|
vine.co
|
129
121
|
walmart.com
|
130
122
|
washingtonpost.com
|
@@ -275,7 +267,6 @@ module Onebox
|
|
275
267
|
|
276
268
|
def rewrite_https(html)
|
277
269
|
return unless html
|
278
|
-
uri = URI(@url)
|
279
270
|
if AllowlistedGenericOnebox.host_matches(uri, AllowlistedGenericOnebox.rewrites)
|
280
271
|
html = html.gsub("http://", "https://")
|
281
272
|
end
|
@@ -19,10 +19,8 @@ module Onebox
|
|
19
19
|
# If possible, fetch the cached HTML body immediately so we can
|
20
20
|
# try to grab the canonical URL from that document,
|
21
21
|
# rather than guess at the best URL structure to use
|
22
|
-
if
|
23
|
-
|
24
|
-
@raw ||= Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
25
|
-
end
|
22
|
+
if !@raw && has_cached_body
|
23
|
+
@raw = Onebox::Helpers.fetch_html_doc(@url, http_params, body_cacher)
|
26
24
|
end
|
27
25
|
|
28
26
|
if @raw
|
@@ -31,7 +29,8 @@ module Onebox
|
|
31
29
|
end
|
32
30
|
|
33
31
|
if match && match[:id]
|
34
|
-
|
32
|
+
id = Addressable::URI.encode_component(match[:id], Addressable::URI::CharacterClasses::PATH)
|
33
|
+
return "https://www.amazon.#{tld}/dp/#{id}"
|
35
34
|
end
|
36
35
|
|
37
36
|
@url
|
@@ -49,6 +48,12 @@ module Onebox
|
|
49
48
|
|
50
49
|
private
|
51
50
|
|
51
|
+
def has_cached_body
|
52
|
+
body_cacher&.respond_to?('cache_response_body?') &&
|
53
|
+
body_cacher.cache_response_body?(uri.to_s) &&
|
54
|
+
body_cacher.cached_response_body_exists?(uri.to_s)
|
55
|
+
end
|
56
|
+
|
52
57
|
def match
|
53
58
|
@match ||= @url.match(/(?:d|g)p\/(?:product\/|video\/detail\/)?(?<id>[A-Z0-9]+)(?:\/|\?|$)/mi)
|
54
59
|
end
|
@@ -57,9 +62,9 @@ module Onebox
|
|
57
62
|
if (main_image = raw.css("#main-image")) && main_image.any?
|
58
63
|
attributes = main_image.first.attributes
|
59
64
|
|
60
|
-
|
61
|
-
|
62
|
-
|
65
|
+
if attributes["data-a-hires"]
|
66
|
+
return attributes["data-a-hires"].to_s
|
67
|
+
elsif attributes["data-a-dynamic-image"]
|
63
68
|
return ::JSON.parse(attributes["data-a-dynamic-image"].value).keys.first
|
64
69
|
end
|
65
70
|
end
|
@@ -67,9 +72,11 @@ module Onebox
|
|
67
72
|
if (landing_image = raw.css("#landingImage")) && landing_image.any?
|
68
73
|
attributes = landing_image.first.attributes
|
69
74
|
|
70
|
-
|
71
|
-
|
72
|
-
|
75
|
+
if attributes["data-old-hires"]
|
76
|
+
return attributes["data-old-hires"].to_s
|
77
|
+
else
|
78
|
+
return landing_image.first["src"].to_s
|
79
|
+
end
|
73
80
|
end
|
74
81
|
|
75
82
|
if (ebook_image = raw.css("#ebooksImgBlkFront")) && ebook_image.any?
|
@@ -91,16 +98,16 @@ module Onebox
|
|
91
98
|
end
|
92
99
|
|
93
100
|
def multiple_authors(authors_xpath)
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
raw
|
102
|
+
.xpath(authors_xpath)
|
103
|
+
.map { |a| a.inner_text.strip }
|
104
|
+
.join(", ")
|
98
105
|
end
|
99
106
|
|
100
107
|
def data
|
101
108
|
og = ::Onebox::OpenGraph.new(raw)
|
102
109
|
|
103
|
-
if raw.at_css('#dp.book_mobile') #printed books
|
110
|
+
if raw.at_css('#dp.book_mobile') # printed books
|
104
111
|
title = raw.at("h1#title")&.inner_text
|
105
112
|
authors = raw.at_css('#byline_secondary_view_div') ? multiple_authors("//div[@id='byline_secondary_view_div']//span[@class='a-text-bold']") : raw.at("#byline")&.inner_text
|
106
113
|
rating = raw.at("#averageCustomerReviews_feature_div .a-icon")&.inner_text || raw.at("#cmrsArcLink .a-icon")&.inner_text
|
@@ -32,7 +32,7 @@ module Onebox
|
|
32
32
|
<span class='album-title'>#{album_title}</span>
|
33
33
|
</span>
|
34
34
|
</span>
|
35
|
-
<img src='#{og.
|
35
|
+
<img src='#{og.secure_image_url}' #{og.title_attr} height='#{og.image_height}' width='#{og.image_width}'>
|
36
36
|
</a>
|
37
37
|
</div>
|
38
38
|
HTML
|
@@ -43,7 +43,7 @@ module Onebox
|
|
43
43
|
|
44
44
|
<<-HTML
|
45
45
|
<a href='#{escaped_url}' target='_blank' rel='noopener' class="onebox">
|
46
|
-
<img src='#{og.
|
46
|
+
<img src='#{og.secure_image_url}' #{og.title_attr} alt='Imgur' height='#{og.image_height}' width='#{og.image_width}'>
|
47
47
|
</a>
|
48
48
|
HTML
|
49
49
|
end
|
@@ -9,8 +9,8 @@ module Onebox
|
|
9
9
|
matches_regexp(/^https?:\/\/gfycat\.com\//)
|
10
10
|
always_https
|
11
11
|
|
12
|
+
# This engine should have priority over AllowlistedGenericOnebox.
|
12
13
|
def self.priority
|
13
|
-
# This engine should have priority over AllowlistedGenericOnebox.
|
14
14
|
1
|
15
15
|
end
|
16
16
|
|
@@ -21,6 +21,7 @@ module Onebox
|
|
21
21
|
<img src="https://gfycat.com/static/favicons/favicon-96x96.png" class="site-icon" width="64" height="64">
|
22
22
|
<a href="#{data[:url]}" target="_blank" rel="nofollow ugc noopener">Gfycat.com</a>
|
23
23
|
</header>
|
24
|
+
|
24
25
|
<article class="onebox-body">
|
25
26
|
<h4>
|
26
27
|
#{data[:title]} by
|
@@ -36,11 +37,12 @@ module Onebox
|
|
36
37
|
<img title="Sorry, your browser doesn't support HTML5 video." src="#{data[:posterUrl]}">
|
37
38
|
</video>
|
38
39
|
</div>
|
40
|
+
|
39
41
|
<p>
|
40
42
|
<span class="label1">#{data[:keywords]}</span>
|
41
43
|
</p>
|
42
|
-
|
43
44
|
</article>
|
45
|
+
|
44
46
|
<div style="clear: both"></div>
|
45
47
|
</aside>
|
46
48
|
HTML
|
@@ -61,52 +63,50 @@ module Onebox
|
|
61
63
|
@match ||= @url.match(/^https?:\/\/gfycat\.com\/(gifs\/detail\/)?(?<name>.+)/)
|
62
64
|
end
|
63
65
|
|
64
|
-
def
|
65
|
-
@
|
66
|
-
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
67
|
-
Nokogiri::HTML(response)
|
68
|
-
end
|
69
|
-
end
|
66
|
+
def og_data
|
67
|
+
return @og_data if defined?(@og_data)
|
70
68
|
|
71
|
-
|
72
|
-
|
69
|
+
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
70
|
+
page = Nokogiri::HTML(response)
|
71
|
+
script = page.at_css('script[type="application/ld+json"]')
|
73
72
|
|
74
|
-
if json_string =
|
75
|
-
og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
|
73
|
+
if json_string = script&.text
|
74
|
+
@og_data = Onebox::Helpers.symbolize_keys(::MultiJson.load(json_string))
|
75
|
+
else
|
76
|
+
@og_data = {}
|
76
77
|
end
|
77
|
-
|
78
|
-
og_data
|
79
78
|
end
|
80
79
|
|
81
80
|
def data
|
82
|
-
|
81
|
+
return @data if defined?(@data)
|
83
82
|
|
84
|
-
|
83
|
+
@data = {
|
85
84
|
name: match[:name],
|
86
85
|
title: og_data[:headline] || 'No Title',
|
87
86
|
author: og_data[:author],
|
88
|
-
url: @url
|
87
|
+
url: @url,
|
89
88
|
}
|
90
89
|
|
91
|
-
keywords = og_data[:keywords]&.split(',')
|
92
|
-
|
93
|
-
|
90
|
+
if keywords = og_data[:keywords]&.split(',')
|
91
|
+
@data[:keywords] = keywords
|
92
|
+
.map { |keyword| "<a href='https://gfycat.com/gifs/search/#{keyword}'>##{keyword}</a>" }
|
93
|
+
.join(' ')
|
94
94
|
end
|
95
95
|
|
96
96
|
if og_data[:video]
|
97
97
|
content_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:contentUrl])
|
98
98
|
video_url = Pathname.new(content_url)
|
99
|
-
|
100
|
-
|
99
|
+
@data[:webmUrl] = video_url.sub_ext(".webm").to_s
|
100
|
+
@data[:mp4Url] = video_url.sub_ext(".mp4").to_s
|
101
101
|
|
102
102
|
thumbnail_url = ::Onebox::Helpers.normalize_url_for_output(og_data[:video][:thumbnailUrl])
|
103
|
-
|
103
|
+
@data[:posterUrl] = thumbnail_url
|
104
104
|
|
105
|
-
|
106
|
-
|
105
|
+
@data[:width] = og_data[:video][:width]
|
106
|
+
@data[:height] = og_data[:video][:height]
|
107
107
|
end
|
108
108
|
|
109
|
-
|
109
|
+
@data
|
110
110
|
end
|
111
111
|
end
|
112
112
|
end
|
@@ -10,7 +10,7 @@ module Onebox
|
|
10
10
|
include JSON
|
11
11
|
include Onebox::Mixins::GithubBody
|
12
12
|
|
13
|
-
matches_regexp
|
13
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com(?:\/)?(?:.)*\/commit\//)
|
14
14
|
always_https
|
15
15
|
|
16
16
|
def url
|
@@ -7,7 +7,7 @@ module Onebox
|
|
7
7
|
include StandardEmbed
|
8
8
|
include LayoutSupport
|
9
9
|
|
10
|
-
matches_regexp
|
10
|
+
matches_regexp(/^https?:\/\/(?:www\.)?(?:(?:\w)+\.)?(github)\.com[\:\d]*(\/[^\/]+){2}/)
|
11
11
|
always_https
|
12
12
|
|
13
13
|
def self.priority
|
@@ -4,61 +4,43 @@ module Onebox
|
|
4
4
|
module Engine
|
5
5
|
class GoogleDocsOnebox
|
6
6
|
include Engine
|
7
|
+
include StandardEmbed
|
7
8
|
include LayoutSupport
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
document: :docs,
|
17
|
-
presentation: :slides,
|
18
|
-
forms: :forms,
|
19
|
-
}
|
20
|
-
end
|
10
|
+
SUPPORTED_ENDPOINTS = %w(spreadsheets document forms presentation)
|
11
|
+
SHORT_TYPES = {
|
12
|
+
spreadsheets: :sheets,
|
13
|
+
document: :docs,
|
14
|
+
presentation: :slides,
|
15
|
+
forms: :forms,
|
16
|
+
}
|
21
17
|
|
22
|
-
matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{
|
18
|
+
matches_regexp(/^(https?:)?\/\/(docs\.google\.com)\/(?<endpoint>(#{SUPPORTED_ENDPOINTS.join('|')}))\/d\/((?<key>[\w-]*)).+$/)
|
23
19
|
always_https
|
24
20
|
|
25
|
-
|
21
|
+
private
|
26
22
|
|
27
23
|
def data
|
28
|
-
og_data =
|
24
|
+
og_data = get_opengraph
|
25
|
+
short_type = SHORT_TYPES[match[:endpoint].to_sym]
|
26
|
+
|
27
|
+
description = if Onebox::Helpers.blank?(og_data.description)
|
28
|
+
"This #{short_type.to_s.chop.capitalize} is private"
|
29
|
+
else
|
30
|
+
Onebox::Helpers.truncate(og_data.description, 250)
|
31
|
+
end
|
32
|
+
|
29
33
|
{
|
30
34
|
link: link,
|
31
|
-
title: og_data
|
32
|
-
description:
|
33
|
-
type:
|
35
|
+
title: og_data.title || "Google #{short_type.to_s.capitalize}",
|
36
|
+
description: description,
|
37
|
+
type: short_type
|
34
38
|
}
|
35
39
|
end
|
36
40
|
|
37
|
-
def doc_type
|
38
|
-
@doc_type ||= match[:endpoint].to_sym
|
39
|
-
end
|
40
|
-
|
41
|
-
def shorttype
|
42
|
-
GoogleDocsOnebox.short_types[doc_type]
|
43
|
-
end
|
44
|
-
|
45
41
|
def match
|
46
42
|
@match ||= @url.match(@@matcher)
|
47
43
|
end
|
48
|
-
|
49
|
-
def get_og_data
|
50
|
-
response = Onebox::Helpers.fetch_response(url, redirect_limit: 10) rescue nil
|
51
|
-
html = Nokogiri::HTML(response)
|
52
|
-
og_data = {}
|
53
|
-
html.css('meta').each do |m|
|
54
|
-
if m.attribute('property') && m.attribute('property').to_s.match(/^og:/i)
|
55
|
-
m_content = m.attribute('content').to_s.strip
|
56
|
-
m_property = m.attribute('property').to_s.gsub('og:', '')
|
57
|
-
og_data[m_property.to_sym] = m_content
|
58
|
-
end
|
59
|
-
end
|
60
|
-
og_data
|
61
|
-
end
|
62
44
|
end
|
63
45
|
end
|
64
46
|
end
|
@@ -119,8 +119,6 @@ module Onebox
|
|
119
119
|
@placeholder = "https://maps.googleapis.com/maps/api/streetview?size=690x400&location=#{lon},#{lat}&pano=#{panoid}&fov=#{zoom}&heading=#{heading}&pitch=#{pitch}&sensor=false"
|
120
120
|
|
121
121
|
when :canonical
|
122
|
-
uri = URI(@url)
|
123
|
-
|
124
122
|
query = URI::decode_www_form(uri.query).to_h
|
125
123
|
if !query.has_key?("ll")
|
126
124
|
raise ArgumentError, "canonical url lacks location argument" unless query.has_key?("sll")
|
@@ -163,14 +161,20 @@ module Onebox
|
|
163
161
|
end
|
164
162
|
|
165
163
|
def follow_redirect!
|
166
|
-
uri = URI(@url)
|
167
164
|
begin
|
168
|
-
http = Net::HTTP.start(
|
169
|
-
|
170
|
-
|
165
|
+
http = Net::HTTP.start(
|
166
|
+
uri.host,
|
167
|
+
uri.port,
|
168
|
+
use_ssl: uri.scheme == 'https',
|
169
|
+
open_timeout: timeout,
|
170
|
+
read_timeout: timeout
|
171
|
+
)
|
171
172
|
|
173
|
+
response = http.head(uri.path)
|
172
174
|
raise "unexpected response code #{response.code}" unless %w(200 301 302).include?(response.code)
|
175
|
+
|
173
176
|
@url = response.code == "200" ? uri.to_s : response["Location"]
|
177
|
+
@uri = URI(@url)
|
174
178
|
ensure
|
175
179
|
http.finish rescue nil
|
176
180
|
end
|