Class: PUIIndexer
Constant Summary
collapse
- PUI_RESOLVES =
AppConfig[:record_inheritance_resolves]
PeriodicIndexer::WORKER_STATUS_INDEX_ERROR, PeriodicIndexer::WORKER_STATUS_INDEX_SUCCESS, PeriodicIndexer::WORKER_STATUS_NOTHING_INDEXED
IndexerCommon::EXCLUDED_STRING_VALUE_PROPERTIES
Constants included
from JSONModel
JSONModel::REFERENCE_KEY_REGEX
Class Method Summary
collapse
Instance Method Summary
collapse
#handle_deletes, #log, #run, #run_index_round, #start_worker_thread
#add_agents, #add_arks, add_attribute_to_resolve, #add_audit_info, #add_batch_hook, #add_delete_hook, #add_document_prepare_hook, #add_extents, #add_extra_documents_hook, add_indexer_initialize_hook, #add_level, #add_notes, #add_subjects, #add_subjects_subrecord, #add_summary, #add_years, #apply_pui_fields, build_fullrecord, #clean_for_sort, #clean_whitespace, #dedupe_by_uri, #delete_records, #do_http_request, #enum_fields, extract_string_values, generate_permutations_for_identifier, generate_sort_string_for_identifier, generate_years_for_date_range, #get_record_scope, #index_batch, #index_records, #is_repository_unpublished?, #login, pause, paused?, #paused?, #record_has_children, #records_with_children, #reset_session, #sanitize_json, #send_commit, #solr_url, #t, #trim_ark_value
Methods included from JSONModel
JSONModel, #JSONModel, add_error_handler, all, allow_unmapped_enum_value, backend_url, check_valid_refs, client_mode?, custom_validations, destroy_model, enum_default_value, enum_values, handle_error, init, load_schema, #models, models, parse_jsonmodel_ref, parse_reference, repository, repository_for, schema_src, set_publish_flags!, set_repository, strict_mode, strict_mode?, validate_schema, with_repository
Constructor Details
#initialize(backend = nil, state = nil, name) ⇒ PUIIndexer
Returns a new instance of PUIIndexer.
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
# File 'indexer/app/lib/pui_indexer.rb', line 12
def initialize(backend = nil, state = nil, name)
state_class = AppConfig[:index_state_class].constantize
index_state = state || state_class.new("indexer_pui_state")
super(backend, index_state, name)
RecordInheritance.prepare_schemas
@time_to_sleep = AppConfig[:pui_indexing_frequency_seconds].to_i
@thread_count = AppConfig[:pui_indexer_thread_count].to_i
@records_per_thread = AppConfig[:pui_indexer_records_per_thread].to_i
@unpublished_records = java.util.Collections.synchronizedList(java.util.ArrayList.new)
end
|
Class Method Details
.get_indexer(state = nil, name = "PUI Indexer") ⇒ Object
37
38
39
|
# File 'indexer/app/lib/pui_indexer.rb', line 37
def self.get_indexer(state = nil, name = "PUI Indexer")
indexer = self.new(state, name)
end
|
Instance Method Details
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
# File 'indexer/app/lib/pui_indexer.rb', line 112
def add_infscroll_docs(resource_uris, batch)
resource_uris.each do |resource_uri|
json = JSONModel::HTTP.get_json(resource_uri + '/ordered_records')
batch << {
'id' => "#{resource_uri}/ordered_records",
'uri' => "#{resource_uri}/ordered_records",
'pui_parent_id' => resource_uri,
'publish' => "true",
'primary_type' => "resource_ordered_records",
'types' => ['pui'],
'json' => ASUtils.to_json(json)
}
end
end
|
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
# File 'indexer/app/lib/pui_indexer.rb', line 51
def configure_doc_rules
super
record_has_children('resource')
record_has_children('archival_object')
record_has_children('digital_object')
record_has_children('digital_object_component')
record_has_children('classification')
record_has_children('classification_term')
add_document_prepare_hook {|doc, record|
if RecordInheritance.has_type?(doc['primary_type'])
parent_id = doc['id']
doc['id'] = "#{parent_id}#pui"
doc['pui_parent_id'] = parent_id
doc['types'] ||= []
doc['types'] << 'pui'
doc['types'] << "pui_#{doc['primary_type']}"
doc['types'] << 'pui_record'
doc['types'] << 'pui_only'
end
}
add_document_prepare_hook {|doc, record|
if RecordInheritance.has_type?(doc['primary_type'])
doc['json'] = ASUtils.to_json(RecordInheritance.merge(record['record'],
:remove_ancestors => true))
doc['title'] = record['record']['title'] if record['record']['title']
record['record'].delete('ancestors')
if record['record']['instances']
record['record']['instances'].each do |instance|
if instance['sub_container'] && instance['sub_container']['top_container']
top_container = instance['sub_container']['top_container']
if top_container['_resolved']
top_container['_resolved'].delete('internal_note')
if top_container['_resolved']['container_profile'] && top_container['_resolved']['container_profile']['_resolved']
top_container['_resolved']['container_profile']['_resolved'].delete('notes')
end
end
end
end
end
doc['fullrecord'] = IndexerCommon.build_fullrecord(record)
end
}
end
|
#fetch_records(type, ids, resolve) ⇒ Object
28
29
30
31
32
33
34
35
|
# File 'indexer/app/lib/pui_indexer.rb', line 28
def fetch_records(type, ids, resolve)
records = JSONModel(type).all(:id_set => ids.join(","), 'resolve[]' => resolve)
if RecordInheritance.has_type?(type)
RecordInheritance.merge(records, :direct_only => true)
else
records
end
end
|
#index_round_complete(repository) ⇒ Object
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
|
# File 'indexer/app/lib/pui_indexer.rb', line 145
def index_round_complete(repository)
tree_types = [[:resource, :archival_object],
[:digital_object, :digital_object_component],
[:classification, :classification_term]]
start = Time.now
checkpoints = []
tree_uris = []
tree_types.each do |pair|
root_type = pair.first
node_type = pair.last
checkpoints << [repository, root_type, start]
checkpoints << [repository, node_type, start]
last_root_node_mtime = [@state.get_last_mtime(repository.id, root_type) - @window_seconds, 0].max
last_node_mtime = [@state.get_last_mtime(repository.id, node_type) - @window_seconds, 0].max
root_node_ids = Set.new(JSONModel::HTTP.get_json(JSONModel(root_type).uri_for, :all_ids => true, :modified_since => last_root_node_mtime))
node_ids = JSONModel::HTTP.get_json(JSONModel(node_type).uri_for, :all_ids => true, :modified_since => last_node_mtime)
node_ids.each_slice(@records_per_thread) do |ids|
node_records = JSONModel(node_type).all(:id_set => ids.join(","), 'resolve[]' => [])
node_records.each do |record|
root_node_ids << JSONModel.parse_reference(record[root_type.to_s]['ref']).fetch(:id)
end
end
tree_uris.concat(root_node_ids.map {|id| JSONModel(root_type).uri_for(id) })
end
batch = IndexBatch.new
add_infscroll_docs(tree_uris.select {|uri| JSONModel.parse_reference(uri).fetch(:type) == 'resource'},
batch)
tree_indexer = LargeTreeDocIndexer.new(batch)
tree_indexer.add_largetree_docs(tree_uris)
if batch.length > 0
log "Indexed #{batch.length} additional PUI records in repository #{repository.repo_code}"
index_batch(batch, nil, :parent_id_field => 'pui_parent_id')
send_commit
end
if tree_indexer.deletes.length > 0
tree_indexer.deletes.each_slice(100) do |deletes|
delete_records(deletes, :parent_id_field => 'pui_parent_id')
end
end
handle_deletes(:parent_id_field => 'pui_parent_id')
delete_records(@unpublished_records, :parent_id_field => 'pui_parent_id')
@unpublished_records.clear()
checkpoints.each do |repository, type, start|
@state.set_last_mtime(repository.id, type, start)
end
end
|
#record_types ⇒ Object
45
46
47
48
49
|
# File 'indexer/app/lib/pui_indexer.rb', line 45
def record_types
(super.select {|type| RecordInheritance.has_type?(type)} + [:archival_object]).uniq
end
|
#resolved_attributes ⇒ Object
41
42
43
|
# File 'indexer/app/lib/pui_indexer.rb', line 41
def resolved_attributes
super + PUI_RESOLVES
end
|
#skip_index_doc?(doc) ⇒ Boolean
137
138
139
140
141
142
143
|
# File 'indexer/app/lib/pui_indexer.rb', line 137
def skip_index_doc?(doc)
published = doc['publish']
stage_unpublished_for_deletion(doc['id']) unless published
!published
end
|
#skip_index_record?(record) ⇒ Boolean
128
129
130
131
132
133
134
|
# File 'indexer/app/lib/pui_indexer.rb', line 128
def skip_index_record?(record)
published = record['record']['publish']
stage_unpublished_for_deletion("#{record['record']['uri']}#pui") unless published
!published
end
|
#stage_unpublished_for_deletion(doc_id) ⇒ Object
213
214
215
|
# File 'indexer/app/lib/pui_indexer.rb', line 213
def stage_unpublished_for_deletion(doc_id)
@unpublished_records.add(doc_id) if doc_id =~ /#pui$/
end
|