From 57503eefd2cbf537018fd390f3438527c262b61b Mon Sep 17 00:00:00 2001 From: Pete Matsyburka Date: Fri, 27 Jun 2025 10:13:29 +0300 Subject: [PATCH] adjust search index --- lib/search_entries.rb | 30 +++++++++++++++++++++--------- lib/submitters.rb | 2 +- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/lib/search_entries.rb b/lib/search_entries.rb index 5281dbf3..cbd9061c 100644 --- a/lib/search_entries.rb +++ b/lib/search_entries.rb @@ -56,7 +56,7 @@ module SearchEntries end [sql, number, number.length > 1 ? number.delete_prefix('0') : number, keyword] - elsif keyword.match?(/[^\p{L}\d&@._\-]/) || keyword.match?(/\A['"].*['"]\z/) + elsif keyword.match?(/[^\p{L}\d&@.\-]/) || keyword.match?(/\A['"].*['"]\z/) ['tsvector @@ plainto_tsquery(?)', TextUtils.transliterate(keyword.downcase)] else keyword = TextUtils.transliterate(keyword.downcase).squish @@ -180,15 +180,21 @@ module SearchEntries end def index_template(template) - sql = SearchEntry.sanitize_sql_array( - ["SELECT to_tsvector(:text), to_tsvector('simple', :text)", - { text: TextUtils.transliterate(template.name.to_s.downcase).delete("\0") }] - ) + text = TextUtils.transliterate(template.name.to_s.downcase.squish).delete("\0") + + sql = SearchEntry.sanitize_sql_array(["SELECT to_tsvector(:text), to_tsvector('simple', :text)", { text: }]) entry = template.search_entry || template.build_search_entry entry.account_id = template.account_id entry.tsvector, ngram = SearchEntry.connection.select_rows(sql).first + + hyphens = text.scan(/\b[^\s]*?\d-[^\s]+?\b/) + text.scan(/\b[^\s]+-\d[^\s]*?\b/) + + hyphens.uniq.each_with_index do |item, index| + entry.tsvector += " '#{item.delete("'")}':#{index + 1}" unless entry.tsvector.include?(item) + end + entry.ngram = build_ngram(ngram) return if entry.tsvector.blank? @@ -205,15 +211,21 @@ module SearchEntries def index_submission(submission) return if submission.name.blank? - sql = SearchEntry.sanitize_sql_array( - ["SELECT to_tsvector(:text), to_tsvector('simple', :text)", - { text: TextUtils.transliterate(submission.name.to_s.downcase).delete("\0") }] - ) + text = TextUtils.transliterate(submission.name.to_s.downcase.squish).delete("\0") + + sql = SearchEntry.sanitize_sql_array(["SELECT to_tsvector(:text), to_tsvector('simple', :text)", { text: }]) entry = submission.search_entry || submission.build_search_entry entry.account_id = submission.account_id entry.tsvector, ngram = SearchEntry.connection.select_rows(sql).first + + hyphens = text.scan(/\b[^\s]*?\d-[^\s]+?\b/) + text.scan(/\b[^\s]+-\d[^\s]*?\b/) + + hyphens.uniq.each_with_index do |item, index| + entry.tsvector += " '#{item.delete("'")}':#{index + 1}" unless entry.tsvector.include?(item) + end + entry.ngram = build_ngram(ngram) return if entry.tsvector.blank? diff --git a/lib/submitters.rb b/lib/submitters.rb index 65bc4215..b5d84d48 100644 --- a/lib/submitters.rb +++ b/lib/submitters.rb @@ -53,7 +53,7 @@ module Submitters end [sql, number, weight, number.length > 1 ? number.delete_prefix('0') : number, weight] - elsif keyword.match?(/[^\p{L}\d&@._\-]/) + elsif keyword.match?(/[^\p{L}\d&@.\-]/) terms = TextUtils.transliterate(keyword.downcase).split(/\b/).map(&:squish).compact_blank.uniq if terms.size > 1