Skip to content

Commit 5e653ed

Browse files
Merge pull request #376 from MITLibraries/use-465-quepid-turnstile
Pass JSON requests through Turnstile challenge
2 parents a80a1f7 + b14c8df commit 5e653ed

File tree

2 files changed

+44
-30
lines changed

2 files changed

+44
-30
lines changed

app/controllers/search_controller.rb

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
class SearchController < ApplicationController
22
before_action :validate_q!, only: %i[results]
3-
before_action :validate_format_token, only: %i[results]
3+
before_action :authorized_request?, only: %i[results]
44
before_action :set_active_tab, only: %i[results]
5-
before_action :challenge_bots!, only: %i[results]
65
around_action :sleep_if_too_fast, only: %i[results]
76

87
before_action :validate_geobox_presence!, only: %i[results]
@@ -272,15 +271,6 @@ def validate_q!
272271
redirect_to root_url
273272
end
274273

275-
# Redirect suspected crawlers to Turnstile when the bot_detection feature is enabled.
276-
def challenge_bots!
277-
return unless Feature.enabled?(:bot_detection)
278-
return if session[:passed_turnstile]
279-
return unless BotDetector.should_challenge?(request)
280-
281-
redirect_to turnstile_path(return_to: request.fullpath)
282-
end
283-
284274
def validate_geodistance_presence!
285275
return unless Feature.enabled?(:geodata)
286276

@@ -409,30 +399,41 @@ def handle_primo_errors(error)
409399
}]
410400
end
411401

412-
# validate_format_token is only applicable to requests for JSON-format results. It takes no action so long as the
413-
# valid_request_for_json? method returns true - otherwise it renders an error message with a 401 Unauthorized status.
414-
def validate_format_token
415-
return unless request.format.json?
402+
# authorized_request? handles the verification that a request is valid. This validity is enforced in different ways
403+
# based on the requested format. Requests for results in JSON format need to be accompanied with a valid token.
404+
# Requests for results in other formats (HTML, primarily) are subject to review by BotDetector and Turnstile.
405+
#
406+
# If the request if authorized, it returns true (allowing the application to generate a response)
407+
# If the request is not authorized, it either renders an unauthorized error, or redirects the user to Turnstile
408+
# (depending on the requested format)
409+
def authorized_request?
410+
if request.format.json?
411+
return true if format_tokens_defined? && valid_token?
412+
413+
render json: { error: 'Unauthorized request' }, status: :unauthorized
414+
else
415+
return true unless Feature.enabled?(:bot_detection) # always pass if feature not enabled
416+
return true if session[:passed_turnstile]
416417

417-
return if valid_request_for_json?
418+
return true unless BotDetector.should_challenge?(request)
418419

419-
render json: { error: 'Unauthorized request' }, status: :unauthorized
420+
redirect_to turnstile_path(return_to: request.fullpath)
421+
end
420422
end
421423

422-
# valid_request_for_json? is responsible for validating whether a request for JSON format results is accompanied by
423-
# a token which matches the value defined in env.
424-
# 1. If the ENV is undefined, then the feature is not enabled - the check fails, which will prompt an Unauthorized
425-
# error.
426-
# 2. If the ENV is defined, and the provided token matches, then the check fails, and the request will be honored.
427-
# 3. In all other cases, the check fails, which will prompt the Unauthorized error.
428-
def valid_request_for_json?
429-
# Always fail unless the token is defined in ENV
430-
return false unless ENV.fetch('FORMAT_TOKEN', '').present?
424+
# format_tokens_defined? confirms whether a format token is defined in both the environment and query params.
425+
# Returns a boolean.
426+
def format_tokens_defined?
427+
return true if ENV.fetch('FORMAT_TOKEN', '').present? && params.key?(:format_token)
428+
429+
false
430+
end
431431

432-
# Success if tokens match
433-
return true if params[:format_token] == ENV.fetch('FORMAT_TOKEN', '')
432+
# valid_token? confirms whether the token received from the user matches the one defined in the environment. Returns
433+
# a boolean.
434+
def valid_token?
435+
return true if ActiveSupport::SecurityUtils.secure_compare(params[:format_token], ENV.fetch('FORMAT_TOKEN', ''))
434436

435-
# Otherwise fail
436437
false
437438
end
438439
end

test/controllers/search_controller_test.rb

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1127,9 +1127,22 @@ def source_filter_count(controller)
11271127

11281128
test 'results can be returned in JSON format when env is set and valid token is provided' do
11291129
secret_value = 'sooper_sekret'
1130+
quepid_ua = 'Quepid/1.0 (Web Scraper)'
11301131
ClimateControl.modify FORMAT_TOKEN: secret_value do
11311132
mock_timdex_search_with_hits(10)
1132-
get "/results?q=test&format=json&format_token=#{secret_value}"
1133+
get "/results?q=test&format=json&format_token=#{secret_value}", headers: { 'HTTP_USER_AGENT' => quepid_ua }
1134+
assert_response :success
1135+
assert_equal 'application/json; charset=utf-8', response.content_type
1136+
end
1137+
end
1138+
1139+
test 'results can be returned in JSON format when env is set and valid token is provided even with bot challenge' do
1140+
secret_value = 'sooper_sekret'
1141+
quepid_ua = 'Quepid/1.0 (Web Scraper)'
1142+
1143+
ClimateControl.modify(FORMAT_TOKEN: secret_value, FEATURE_BOT_DETECTION: 'true') do
1144+
mock_timdex_search_with_hits(10)
1145+
get "/results?q=test&format=json&format_token=#{secret_value}", headers: { 'HTTP_USER_AGENT' => quepid_ua }
11331146
assert_response :success
11341147
assert_equal 'application/json; charset=utf-8', response.content_type
11351148
end

0 commit comments

Comments
 (0)