pull/493/head
Pete Matsyburka 5 months ago
parent b6a1a3ee4d
commit 700184f776

@ -5,6 +5,7 @@
# Table name: search_entries
#
# id :bigint not null, primary key
# ngram :tsvector
# record_type :string not null
# tsvector :tsvector not null
# created_at :datetime not null
@ -14,6 +15,9 @@
#
# Indexes
#
# index_search_entries_on_account_id_ngram_submission (account_id,ngram) WHERE ((record_type)::text = 'Submission'::text) USING gin
# index_search_entries_on_account_id_ngram_submitter (account_id,ngram) WHERE ((record_type)::text = 'Submitter'::text) USING gin
# index_search_entries_on_account_id_ngram_template (account_id,ngram) WHERE ((record_type)::text = 'Template'::text) USING gin
# index_search_entries_on_account_id_tsvector_submission (account_id,tsvector) WHERE ((record_type)::text = 'Submission'::text) USING gin
# index_search_entries_on_account_id_tsvector_submitter (account_id,tsvector) WHERE ((record_type)::text = 'Submitter'::text) USING gin
# index_search_entries_on_account_id_tsvector_template (account_id,tsvector) WHERE ((record_type)::text = 'Template'::text) USING gin

@ -0,0 +1,14 @@
# frozen_string_literal: true
class AddNgramToSearchIndex < ActiveRecord::Migration[8.0]
def change
add_column :search_entries, :ngram, :tsvector
add_index :search_entries, %i[account_id ngram], using: :gin, where: "record_type = 'Submitter'",
name: 'index_search_entries_on_account_id_ngram_submitter'
add_index :search_entries, %i[account_id ngram], using: :gin, where: "record_type = 'Submission'",
name: 'index_search_entries_on_account_id_ngram_submission'
add_index :search_entries, %i[account_id ngram], using: :gin, where: "record_type = 'Template'",
name: 'index_search_entries_on_account_id_ngram_template'
end
end

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2025_06_03_105556) do
ActiveRecord::Schema[8.0].define(version: 2025_06_08_163157) do
# These are extensions that must be enabled in order to support this database
enable_extension "btree_gin"
enable_extension "plpgsql"
@ -264,6 +264,10 @@ ActiveRecord::Schema[8.0].define(version: 2025_06_03_105556) do
t.tsvector "tsvector", null: false
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.tsvector "ngram"
t.index ["account_id", "ngram"], name: "index_search_entries_on_account_id_ngram_submission", where: "((record_type)::text = 'Submission'::text)", using: :gin
t.index ["account_id", "ngram"], name: "index_search_entries_on_account_id_ngram_submitter", where: "((record_type)::text = 'Submitter'::text)", using: :gin
t.index ["account_id", "ngram"], name: "index_search_entries_on_account_id_ngram_template", where: "((record_type)::text = 'Template'::text)", using: :gin
t.index ["account_id", "tsvector"], name: "index_search_entries_on_account_id_tsvector_submission", where: "((record_type)::text = 'Submission'::text)", using: :gin
t.index ["account_id", "tsvector"], name: "index_search_entries_on_account_id_tsvector_submitter", where: "((record_type)::text = 'Submitter'::text)", using: :gin
t.index ["account_id", "tsvector"], name: "index_search_entries_on_account_id_tsvector_template", where: "((record_type)::text = 'Template'::text)", using: :gin

@ -42,21 +42,43 @@ module SearchEntries
end
end
def build_tsquery(keyword)
def build_tsquery(keyword, with_or_vector: false)
keyword = keyword.delete("\0")
if keyword.match?(/\d/) && !keyword.match?(/\p{L}/)
number = keyword.gsub(/\D/, '')
["tsvector @@ ((quote_literal(?) || ':*')::tsquery || (quote_literal(?) || ':*')::tsquery || plainto_tsquery(?))",
number, number.length > 1 ? number.delete_prefix('0') : number, keyword]
sql =
if number.length <= 2
<<~SQL.squish
ngram @@ (quote_literal(?)::tsquery || quote_literal(?)::tsquery) OR tsvector @@ plainto_tsquery(?)
SQL
else
<<~SQL.squish
tsvector @@ ((quote_literal(?) || ':*')::tsquery || (quote_literal(?) || ':*')::tsquery || plainto_tsquery(?))
SQL
end
[sql, number, number.length > 1 ? number.delete_prefix('0') : number, keyword]
elsif keyword.match?(/[^\p{L}\d&@._\-+]/) || keyword.match?(/\A['"].*['"]\z/)
['tsvector @@ plainto_tsquery(?)', TextUtils.transliterate(keyword.downcase)]
else
[
"tsvector @@ (quote_literal(coalesce((ts_lexize('english_stem', :keyword))[1], :keyword)) || ':*')::tsquery",
{ keyword: TextUtils.transliterate(keyword.downcase).squish }
]
keyword = TextUtils.transliterate(keyword.downcase).squish
sql =
if keyword.length <= 2
arel = Arel.sql(<<~SQL.squish)
ngram @@ quote_literal(:keyword)::tsquery
SQL
arel = Arel::Nodes::Or.new([arel, Arel.sql('tsvector @@ plainto_tsquery(:keyword)')]).to_sql if with_or_vector
arel
else
"tsvector @@ (quote_literal(coalesce((ts_lexize('english_stem', :keyword))[1], :keyword)) || ':*')::tsquery"
end
[sql, { keyword: }]
end
end
@ -78,25 +100,51 @@ module SearchEntries
["tsvector @@ (#{query.to_sql})", terms.index_by.with_index { |_, index| :"term#{index}" }.merge(weight:)]
end
def build_weights_wildcard_tsquery(keyword, weight)
keyword = TextUtils.transliterate(keyword.downcase).squish
sql =
if keyword.length <= 2
<<~SQL.squish
ngram @@ (quote_literal(:keyword) || ':' || :weight)::tsquery
SQL
else
<<~SQL.squish
tsvector @@ (quote_literal(coalesce((ts_lexize('english_stem', :keyword))[1], :keyword)) || ':*' || :weight)::tsquery
SQL
end
[sql, { keyword:, weight: }]
end
def index_submitter(submitter)
return if submitter.email.blank? && submitter.phone.blank? && submitter.name.blank?
email_phone_name = [
[submitter.email.to_s, submitter.email.to_s.split('@').last].join(' ').delete("\0"),
[submitter.phone.to_s.gsub(/\D/, ''),
submitter.phone.to_s.gsub(PhoneCodes::REGEXP, '').gsub(/\D/, '')].uniq.join(' ').delete("\0"),
TextUtils.transliterate(submitter.name).delete("\0")
]
sql = SearchEntry.sanitize_sql_array(
[
"SELECT setweight(to_tsvector(?), 'A') || setweight(to_tsvector(?), 'B') ||
setweight(to_tsvector(?), 'C') || setweight(to_tsvector(?), 'D')".squish,
[submitter.email.to_s, submitter.email.to_s.split('@').last].join(' ').downcase.delete("\0"),
[submitter.phone.to_s.gsub(/\D/, ''),
submitter.phone.to_s.gsub(PhoneCodes::REGEXP, '').gsub(/\D/, '')].uniq.join(' ').delete("\0"),
TextUtils.transliterate(submitter.name.to_s.downcase).delete("\0"),
build_submitter_values_string(submitter)
setweight(to_tsvector(?), 'C') || setweight(to_tsvector(?), 'D') as tsvector,
setweight(to_tsvector('simple', ?), 'A') ||
setweight(to_tsvector('simple', ?), 'B') ||
setweight(to_tsvector('simple', ?), 'C') as ngram".squish,
*email_phone_name,
build_submitter_values_string(submitter),
*email_phone_name
]
)
entry = submitter.search_entry || submitter.build_search_entry
entry.account_id = submitter.account_id
entry.tsvector = SearchEntry.connection.select_value(sql)
entry.tsvector, ngram = SearchEntry.connection.select_rows(sql).first
entry.ngram = build_ngram(ngram)
return if entry.tsvector.blank?
@ -122,13 +170,15 @@ module SearchEntries
def index_template(template)
sql = SearchEntry.sanitize_sql_array(
['SELECT to_tsvector(?)', TextUtils.transliterate(template.name.to_s.downcase).delete("\0")]
["SELECT to_tsvector(:text), to_tsvector('simple', :text)",
{ text: TextUtils.transliterate(template.name.to_s.downcase).delete("\0") }]
)
entry = template.search_entry || template.build_search_entry
entry.account_id = template.account_id
entry.tsvector = SearchEntry.connection.select_value(sql)
entry.tsvector, ngram = SearchEntry.connection.select_rows(sql).first
entry.ngram = build_ngram(ngram)
return if entry.tsvector.blank?
@ -145,13 +195,15 @@ module SearchEntries
return if submission.name.blank?
sql = SearchEntry.sanitize_sql_array(
['SELECT to_tsvector(?)', TextUtils.transliterate(submission.name.to_s.downcase).delete("\0")]
["SELECT to_tsvector(:text), to_tsvector('simple', :text)",
{ text: TextUtils.transliterate(submission.name.to_s.downcase).delete("\0") }]
)
entry = submission.search_entry || submission.build_search_entry
entry.account_id = submission.account_id
entry.tsvector = SearchEntry.connection.select_value(sql)
entry.tsvector, ngram = SearchEntry.connection.select_rows(sql).first
entry.ngram = build_ngram(ngram)
return if entry.tsvector.blank?
@ -163,4 +215,14 @@ module SearchEntries
retry
end
def build_ngram(ngram)
ngrams =
ngram.split(/\s(?=')/).each_with_object([]) do |item, acc|
acc << item.sub(/'(.*?)':/) { "'#{Regexp.last_match(1).first(2)}':" }
acc << item.sub(/'(.*?)':/) { "'#{Regexp.last_match(1).first(1)}':" }
end
ngrams.uniq { |e| e.sub(/':[\d,]/, "':1") }.join(' ')
end
end

@ -60,7 +60,7 @@ module Submissions
arel = Arel::Nodes::Union.new(
arel, Submitter.joins(:search_entry)
.where(search_entry: { account_id: current_user.account_id })
.where(*SearchEntries.build_tsquery(keyword))
.where(*SearchEntries.build_tsquery(keyword, with_or_vector: true))
.select(:submission_id).arel
)

@ -45,21 +45,24 @@ module Submitters
if keyword.match?(/\d/) && !keyword.match?(/\p{L}/)
number = keyword.gsub(/\D/, '')
["tsvector @@ ((quote_literal(?) || ':*#{weight}')::tsquery || (quote_literal(?) || ':*#{weight}')::tsquery)",
number, number.length > 1 ? number.delete_prefix('0') : number]
sql =
if number.length <= 2
"ngram @@ ((quote_literal(?) || ':' || ?)::tsquery || (quote_literal(?) || ':' || ?)::tsquery)"
else
"tsvector @@ ((quote_literal(?) || ':*' || ?)::tsquery || (quote_literal(?) || ':*' || ?)::tsquery)"
end
[sql, number, weight, number.length > 1 ? number.delete_prefix('0') : number, weight]
elsif keyword.match?(/[^\p{L}\d&@._\-+]/)
terms = TextUtils.transliterate(keyword.downcase).split(/\b/).map(&:squish).compact_blank.uniq
if terms.size > 1
SearchEntries.build_weights_tsquery(terms, weight)
else
[
SearchEntries::FIELD_SEARCH_QUERY_SQL,
{ keyword: TextUtils.transliterate(keyword.downcase).squish, weight: }
]
SearchEntries.build_weights_wildcard_tsquery(keyword, weight)
end
else
[SearchEntries::FIELD_SEARCH_QUERY_SQL, { keyword: TextUtils.transliterate(keyword.downcase).squish, weight: }]
SearchEntries.build_weights_wildcard_tsquery(keyword, weight)
end
submitters.where(

Loading…
Cancel
Save