Module: SlugHelpers

Defined in:
backend/app/lib/slugs/slug_helpers.rb,
backend/app/lib/slugs/slug_helpers_generate.rb,
backend/app/lib/slugs/slug_helpers_eligibility.rb,
backend/app/lib/slugs/slug_helpers_generate_by_id.rb,
backend/app/lib/slugs/slug_helpers_generate_by_name.rb

Constant Summary collapse

AGENT_RECORD_TYPES =

TODO: get lists dynamically

[
  "AgentPerson",
  "AgentFamily",
  "AgentCorporateEntity",
  "AgentSoftware",
].freeze
BASE_RECORD_TYPES =
[
  "Resource",
  "Subject",
  "DigitalObject",
  "Accession",
  "Classification",
  "ClassificationTerm",
  "ArchivalObject",
  "DigitalObjectComponent",
].freeze
NAME_RECORD_TYPES =
[
  "NamePerson",
  "NameCorporateEntity",
  "NameFamily",
  "NameSoftware",
].freeze

Class Method Summary collapse

Class Method Details

.base_slug_changed?(slug, previous_slug) ⇒ Boolean

returns true if the base slug (non-deduped) is different between slug and previous_slug Examples: slug = “foo”, previous_slug = “foo_1” => false slug = “foo_123”, previous_slug = “foo_123_1” => false slug = “foo_123”, previous_slug = “foo_124” => true slug = “foo_123”, previous_slug = “foo_124_1” => true

Returns:

  • (Boolean)


112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 112

def self.base_slug_changed?(slug, previous_slug)
  # first, compare the two slugs from left to right to see what they have in common. Remove anything in common.
  # Then, remove anything that matches the pattern of underscore followed by digits, like _1, _2, or _314159, etc that would indicate a deduping suffix
  # if there is nothing left, then the base slugs are the same.

  # the base slug has changed if previous_slug is nil/empty but slug is not
  if (previous_slug.nil? || previous_slug.empty?) &&
     (!slug.nil? && !slug.empty?)

    return true
  end

  # the base slug has changed if slug is nil/empty but previous_slug is not
  if (slug.nil? || slug.empty?) &&
     (!previous_slug.nil? && !previous_slug.empty?)

    return true
  end

  # if we're at this point, then one of the two slugs is not nil or empty.
  # We need to ensure we're calling the following gsubs on a non empty string.
  if previous_slug.nil? || previous_slug.empty?
    check_on = slug
    check_with = previous_slug
  else
    check_on = previous_slug
    check_with = slug
  end

  slug_difference = check_on.gsub(/^#{check_with}/, "")
                            .gsub(/_\d+$/, "")

  # the base slug has changed if there is something left over in slug_difference
  return !slug_difference.empty?
end

.base_sluggable_class?(klass) ⇒ Boolean

Returns:

  • (Boolean)


157
158
159
# File 'backend/app/lib/slugs/slug_helpers_eligibility.rb', line 157

def self.base_sluggable_class?(klass)
  BASE_RECORD_TYPES.include?(klass.to_s)
end

.cacheObject



3
4
5
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 3

def self.cache
  @@cache ||= Set.new
end

.cache_resetObject



7
8
9
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 7

def self.cache_reset
  @@cache = Set.new
end

.cache_setupObject

preload manually generated slugs into the cache



12
13
14
15
16
17
18
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 12

def self.cache_setup
  slug_record_types.each do |klass|
    cache.merge(
      klass.where(Sequel.&(Sequel.~(slug: nil), is_slug_auto: 0)).select_map(:slug)
    )
  end
end

.clean_slug(slug) ⇒ Object

remove invalid chars and truncate slug NOTE: If changes are made here, then they should be also made in spec_slugs_helper.rb. Also, there may need to be a new migration if the cleaning changes need to be done on repository slugs, eg. migration 129



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 44

def self.clean_slug(slug)
  if slug
    # if the slug contains two slashes (forward or backward) next to each other, completely zero it out.
    # this is intended to revert an entity to use the URI if the ID or name the slug was generated from is a URI.
    slug = "" if slug =~ /\/\// || slug =~ /\\/

    # remove markup tags
    slug = slug.gsub(/<\/?[^>]*>/, "")

    # downcase everything to simplify case sensitivity issues
    slug = slug.downcase

    # replace spaces with underscores
    slug = slug.gsub(" ", "_")

    # remove double hypens
    slug = slug.gsub("--", "")

    # remove en and em dashes
    slug = slug.gsub(/[\u2013-\u2014]/, "")

    # remove single quotes
    slug = slug.gsub("'", "")

    # remove URL-reserved chars
    slug = slug.gsub(/[&;?$<>#%{}|\\^~\[\]`\/\*\(\)@=:+,!.]/, "")

    # enforce length limit of 50 chars
    slug = slug.slice(0, 50)

    # replace any multiple underscores with a single underscore
    slug = slug.gsub(/_[_]+/, "_")

    # remove any leading or trailing underscores
    slug = slug.gsub(/^_/, "").gsub(/_$/, "")

    # if slug is numeric, add a leading '__'
    # this is necessary, because numerical slugs will be interpreted as an id by the controller
    if slug.match(/^(\d)+$/)
      slug = slug.prepend("__")
    end

  else
    slug = ""
  end

  return slug.parameterize
end

.dedupe_slug(dupe_slug, count) ⇒ Object

dupe_slug is already in use.



159
160
161
162
163
164
165
166
167
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 159

def self.dedupe_slug(dupe_slug, count)
  new_slug = "#{dupe_slug}_#{count}"
  loop do
    break unless slug_in_use?(new_slug)
    new_slug = "#{dupe_slug}_#{count += 1}"
  end

  new_slug
end

.generate_slug_for_agent_name!(entity) ⇒ Object

auto generate a slug for the Agent associated with this AgentName Then, find that associated Agent and update it’s slug. if for any reason we generate an empty slug, then turn autogen off for the agent.



6
7
8
9
# File 'backend/app/lib/slugs/slug_helpers_generate_by_name.rb', line 6

def self.generate_slug_for_agent_name!(entity)
  slug = name_based_slug_for(entity, entity.class)
  update_agent_slug_from_name(entity, slug)
end

.get_id_from_slug(slug, controller, action) ⇒ Object

Find the record given the slug, return id, repo_id, and table name. This is a gnarly descision tree because the query we’ll run depends on which controller is asking, and whether we’re scoping by repo slug or not.



12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'backend/app/lib/slugs/slug_helpers.rb', line 12

def self.get_id_from_slug(slug, controller, action)
  if controller == "repositories"
    rec = Repository.where(:slug => slug).first
    table = "repository"
  elsif controller == "agents"
    rec, table = self.find_slug_in_agent_tables(slug)
  elsif controller == "subjects"
    rec = Subject.where(:slug => slug).first
    table = "subject"
  elsif controller == "objects"
    rec, table = self.find_slug_in_object_tables_any_repo(slug)
  else
    rec, table = find_any_repo(slug, controller, action)
  end

  if rec
    return [rec[:id], table, rec[:repo_id]]
  # Always return -1 if we can't find that slug
  else
    return [-1, table, -1]
  end
end

.get_slugged_url_for_largetree(jsonmodel_type, repo_id, slug) ⇒ Object

Generates URLs for display in hirearchial tree links in public interface for Archival Objects and Digital object components



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'backend/app/lib/slugs/slug_helpers.rb', line 37

def self.get_slugged_url_for_largetree(jsonmodel_type, repo_id, slug)
  if slug && AppConfig[:use_human_readable_urls]
    if AppConfig[:repo_name_in_slugs]
      repo = Repository.first(:id => repo_id)
      repo_slug = repo && repo.slug ? repo.slug : ""

      if repo_slug.empty?
        return "#{AppConfig[:public_proxy_url]}/#{jsonmodel_type.underscore}s/#{slug}"
      else
        return "#{AppConfig[:public_proxy_url]}/repositories/#{repo_slug}/#{jsonmodel_type.underscore}s/#{slug}"
      end
    else
      return "#{AppConfig[:public_proxy_url]}/#{jsonmodel_type.underscore}s/#{slug}"
    end
  else
    return ""
  end
end

.id_based_slug_for(entity, klass) ⇒ Object

generate and return a string for a slug based on this thing’s ID.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'backend/app/lib/slugs/slug_helpers_generate_by_id.rb', line 3

def self.id_based_slug_for(entity, klass)
  if klass == Resource || klass == Accession
    if AppConfig[:generate_resource_slugs_with_eadid] && entity[:ead_id] && klass == Resource
      # use EADID if configured. Otherwise, use identifier.
      slug = entity[:ead_id]
    else
      if entity.respond_to?(:format_multipart_identifier)
        slug = entity.format_multipart_identifier
      else
        slug = "#{entity[:id_0]}"
        slug += "-#{entity[:id_1]}" if entity[:id_1]
        slug += "-#{entity[:id_2]}" if entity[:id_2]
        slug += "-#{entity[:id_3]}" if entity[:id_3]
      end
    end
  elsif klass == Classification || klass == ClassificationTerm
    slug = entity[:identifier]
  elsif klass == DigitalObject
    slug = entity[:digital_object_id]
  elsif klass == Repository
    slug = entity[:repo_code]
  elsif klass == ArchivalObject
    if AppConfig[:generate_archival_object_slugs_with_cuid]
      slug = entity[:component_id]
    else
      slug = entity[:ref_id]
    end
  elsif klass == DigitalObjectComponent
    slug = entity[:component_id]
  elsif klass == Subject
    slug = entity[:authority_id]
  #turned autogen on without updating any other data
  #should be JSON only
  elsif is_agent_type?(klass)
    if entity.class.to_s =~ /JSONModel/
      primary_name = entity["names"].select {|n| n["is_display_name"] == true }
      # we should have a single primary name.
      # if we don't, then someentity's wrong and use the first name as a fallback.
      if primary_name.length == 1
        primary_name = primary_name[0]
      else
        primary_name = entity["names"][0]
      end
      slug = primary_name["authority_id"]
    elsif is_agent_type?(entity.class)
      disp_name = get_json_for_agent(entity, klass)
      slug = disp_name["authority_id"]
    end
  else
    slug = ""
  end

  slug = clean_slug(slug)

  # only de-dupe and update if our base slug has changed from it's previous value
  previous_slug = entity[:slug]
  if base_slug_changed?(slug, previous_slug)
    return run_dedupe_slug(slug)
  else
    return previous_slug
  end
end

.is_agent_name_type?(klass) ⇒ Boolean

Returns:

  • (Boolean)


149
150
151
# File 'backend/app/lib/slugs/slug_helpers_eligibility.rb', line 149

def self.is_agent_name_type?(klass)
  NAME_RECORD_TYPES.include?(klass.to_s)
end

.is_agent_type?(klass) ⇒ Boolean

Returns:

  • (Boolean)


153
154
155
# File 'backend/app/lib/slugs/slug_helpers_eligibility.rb', line 153

def self.is_agent_type?(klass)
  AGENT_RECORD_TYPES.include?(klass.to_s)
end

.is_slug_auto_enabled?(entity) ⇒ Boolean

returns true if is_slug_auto is enabled for entity, or if we should treat it like it is

Returns:

  • (Boolean)


135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'backend/app/lib/slugs/slug_helpers_eligibility.rb', line 135

def self.is_slug_auto_enabled?(entity)
  enabled = false

  if entity[:is_slug_auto] && entity[:is_slug_auto] == 1
    enabled = true

  # the agent Name classes don't have slug fields, but if they are being updated, we may need to update the associated agent.
  elsif is_agent_name_type?(entity.class)
    enabled = true
  end

  return enabled
end

.job_running(status: false) ⇒ Object



20
21
22
23
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 20

def self.job_running(status: false)
  status == true ? cache_setup : cache_reset
  @@running = status
end

.job_running?Boolean

Returns:

  • (Boolean)


25
26
27
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 25

def self.job_running?
  @@running ||= false
end

.name_based_slug_for(entity, klass) ⇒ Object

Generate and return a string for a slug based on this thing’s title or name. unlike #generate_slug_by_name!, this method does not modify the passed in object. NOTE: ‘klass’ is passed in by the caller to give us a clue as to what kind of entity we’re working with. ‘entity’ is a data structure that has what we need. It may be a JSONModel or a Sequel object.



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'backend/app/lib/slugs/slug_helpers_generate_by_name.rb', line 17

def self.name_based_slug_for(entity, klass)
  if klass == Repository
    # Always use repo_code for repository slug
    slug = entity[:repo_code]
  elsif !is_agent_name_type?(klass)
    if !entity[:title].nil? && !entity[:title].empty?
      slug = entity[:title]
    elsif !entity[:name].nil? && !entity[:name].empty?
      slug = entity[:name]
    end
  # This codepath is run on updating slugs for agents, where we get either a Sequel Name object, or a Hash
  elsif is_agent_name_type?(klass)
    if entity.class == Hash
      # turn keys into symbols, that's what we expect down the line
      entity.keys.each do |key|
        entity[(key.to_sym rescue key) || key] = entity.delete(key)
      end
      slug = get_agent_name_string_from_hash(entity, klass)
    elsif is_agent_name_type?(entity.class)
      slug = get_agent_name_string_from_sequel(entity, klass)
    end
  else
    slug = ""
  end

  slug = clean_slug(slug)

  # only de-dupe and update if our base slug has changed from it's previous value
  previous_slug = entity[:slug]
  if base_slug_changed?(slug, previous_slug)
    return run_dedupe_slug(slug)
  else
    return previous_slug
  end
end

.reset_autogenerated_slugsObject

for the generate_slugs_runner job: clear out previously autogenerated slugs so we don’t have to lookup if generated slugs are in use from before this job was run (cache_setup preloads manually created slugs)



33
34
35
36
37
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 33

def self.reset_autogenerated_slugs
  slug_record_types.each do |klass|
    klass.where(Sequel.&(Sequel.~(slug: nil), is_slug_auto: 1)).update(slug: nil)
  end
end

.run_dedupe_slug(slug) ⇒ Object

runs dedupe if necessary



94
95
96
97
98
99
100
101
102
103
104
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 94

def self.run_dedupe_slug(slug)
  # search for dupes
  if !slug.empty? && slug_in_use?(slug)
    slug = dedupe_slug(slug, 1)
  else
    slug
  end
  cache << slug if job_running?

  slug
end

.slug_data_updated?(obj) ⇒ Boolean

Determine if our record has updated a data field that indicates an autogenerated slug should be updated. Generally, we’ll always want to return true here and run the slug code if the record is brand new (hasn’t been persisted) slug will be updated iff this method returns true

Returns:

  • (Boolean)


35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'backend/app/lib/slugs/slug_helpers_eligibility.rb', line 35

def self.slug_data_updated?(obj)
  id_field_changed   = false
  name_field_changed = false
  persisted          = obj.exists?

  slug_field_changed = obj.column_changed?(:slug)
  slug_auto_field_changed = obj.column_changed?(:is_slug_auto)

  updated = false

  case obj.class.to_s
  when "Resource"
    if AppConfig[:generate_resource_slugs_with_eadid]
      id_field_changed = obj.column_changed?(:ead_id)     || !persisted
    else
      id_field_changed = obj.column_changed?(:identifier) || !persisted
    end

    name_field_changed = obj.column_changed?(:title) || !persisted

  when "Accession"
    id_field_changed = obj.column_changed?(:identifier) || !persisted
    name_field_changed = obj.column_changed?(:title)    || !persisted

  when "DigitalObject"
    id_field_changed = obj.column_changed?(:digital_object_id) || !persisted
    name_field_changed = obj.column_changed?(:title)           || !persisted

  when "DigitalObjectComponent"
    id_field_changed = obj.column_changed?(:component_id) || !persisted
    name_field_changed = obj.column_changed?(:title)      || !persisted

  when "Classification"
    id_field_changed = obj.column_changed?(:identifier) || !persisted
    name_field_changed = obj.column_changed?(:title)    || !persisted

  when "ClassificationTerm"
    id_field_changed = obj.column_changed?(:identifier) || !persisted
    name_field_changed = obj.column_changed?(:title)    || !persisted

  when "Repository"
    id_field_changed = obj.column_changed?(:repo_code) || !persisted
    name_field_changed = obj.column_changed?(:name)    || !persisted

  when "ArchivalObject"
    if AppConfig[:generate_archival_object_slugs_with_cuid] = true
      id_field_changed = obj.column_changed?(:component_id) || !persisted
    else
      id_field_changed = obj.column_changed?(:ref_id) || !persisted
    end

    name_field_changed = obj.column_changed?(:title) || !persisted

  when "Subject"
    id_field_changed = obj.column_changed?(:authority_id) || !persisted
    name_field_changed = obj.column_changed?(:title) || !persisted
  end

  # for agent objects, the fields we need for name slugs are in a different table.
  # since we don't have access to that object here, we'll always process slugs for agents.
  # We only want to update the name agent classes if they exist already and have an ID. (persisted) This is because on create, the auto_generate hook on the Agent model creates a slug and we don't want a duplicate.

  # The only time we want to run slug code for agent classes is when the is_slug_auto flag is toggled
  if is_agent_type?(obj.class)
    id_field_changed = false
    name_field_changed = false
  end

  # only run slug code for this AgentName if it's a display name
  if is_agent_name_type?(obj.class)
    id_field_changed = false
    name_field_changed = obj[:is_display_name] == 1
  end


  # auto-gen slugs has been switched from OFF to ON
  if slug_auto_field_changed && obj[:is_slug_auto] == 1
    updated = true

  # auto-gen slugs is OFF, and slug field updated
  elsif obj[:is_slug_auto] == 0 && slug_field_changed
    updated = true

  # auto-gen slugs is ON based on name, and name has changed
  elsif !AppConfig[:auto_generate_slugs_with_id] && name_field_changed
    updated = true

  # auto-gen slugs is ON based on id, and id has changed
  elsif AppConfig[:auto_generate_slugs_with_id] && id_field_changed
    updated = true

  # any other case, we can skip slug processing
  else
    updated = false
  end

  return updated
end

.slug_in_use?(slug) ⇒ Boolean

given a slug, return true if slug is used by another entity. return false otherwise.

Returns:

  • (Boolean)


150
151
152
153
154
155
156
# File 'backend/app/lib/slugs/slug_helpers_generate.rb', line 150

def self.slug_in_use?(slug)
  if job_running?
    cache.include? slug
  else
    (slug_record_types + [Repository]).inject(0) {|count, klass| count + klass.where(:slug => slug).count } > 0
  end
end

.slug_record_typesObject



28
29
30
# File 'backend/app/lib/slugs/slug_helpers_eligibility.rb', line 28

def self.slug_record_types
  @@slug_record_types ||= (AGENT_RECORD_TYPES + BASE_RECORD_TYPES).map { |string| Kernel.const_get(string) }
end