The following query can be used to parse out "Thesis" call numbers into the major components (LC Class, LC Class Number, First Cutter, Second Cutter and Year). Thesis call numbers can be very variable, so the query does not work for everything!
Most problems appear to be in getting the second cutter - any suggestions for improvement are gratefully accepted! - JL 11-30-23
-- 11-22-23: This query parses out thesis call numbers into LC class, Lc class number, internal year, first cutter and second cutter
-- Does not work well with malformed call numbers or very unusual thesis call number schemes
-- 11-30-23: updated query to work with Law theses and sometimes for Thesis Film
-- 1. Select thesis holdings records where "Thesis" is part of the call number or call number prefix
WITH parameters AS
(SELECT
'%Mann%'::VARCHAR AS library_name_filter
),
recs AS (
SELECT
ll.library_name,
he.permanent_location_name,
he.holdings_hrid,
he.discovery_suppress,
he.call_number_prefix,
he.call_number
FROM folio_reporting.holdings_ext AS he
LEFT JOIN folio_reporting.locations_libraries AS ll
ON he.permanent_location_id = ll.location_id
WHERE (he.call_number ILIKE '%thesis%' OR he.call_number_prefix ILIKE '%thesis%')
AND (he.discovery_suppress = 'False' OR he.discovery_suppress IS NULL)
AND ll.library_name ILIKE (SELECT library_name_filter FROM parameters)
)
-- 2. Apply parsing formulas to call number to get LC class, LC class number, internal year, first cutter and second cutter
SELECT
recs.library_name,
recs.permanent_location_name,
recs.holdings_hrid,
recs.call_number_prefix,
recs.call_number,
-- get LC class
CASE
WHEN recs.call_number ILIKE '%film%' THEN ' - '
WHEN recs.call_number ILIKE '%thesis%' THEN SUBSTRING (recs.call_number, '\s[A-Za-z]{1,3}')
ELSE SUBSTRING (recs.call_number,'[A-Za-z]{1,3}') END AS lc_class,
-- get LC class number
CASE WHEN recs.call_number ILIKE '%film%' THEN NULL
ELSE
(TRIM (trailing '.' FROM
CASE
WHEN SUBSTRING (recs.call_number,1,1) in ('1','2') THEN NULL
WHEN SUBSTRING (recs.call_number,2,1) = ' ' THEN NULL
ELSE SUBSTRING (recs.call_number,'\d{1,4}\.{0,}\d{0,}')
END))
END AS lc_class_number,
-- get internal year
TRIM (CASE WHEN SUBSTRING (recs.call_number,1,1) IN ('1','2')
THEN SUBSTRING (recs.call_number,1,4)
ELSE SUBSTRING (recs.call_number, '\s\d{4}\s{0,1}')
END) AS internal_year,
-- get first cutter
CASE WHEN recs.library_name NOT LIKE 'Law%'
THEN SUBSTRING (recs.call_number, '\d\s{0,1}([A-Z]\d{1,})')
ELSE SUBSTRING (recs.call_number, '(\.[A-Z]\d{1,}[a-z]{0,1})')
END AS first_cutter,
-- get second cutter
CASE
WHEN SUBSTRING (recs.call_number, '\.\s{0,1}[A-Z]\d{1,}') IS NULL
OR
TRIM (SUBSTRING (recs.call_number, '\.\s{0,1}[A-Z]\d{1,}')) = TRIM (SUBSTRING (recs.call_number, '\s{1}\.{0,1}[A-Z]{1}\d{1,}[a-z]{0,1}'))
THEN NULL
ELSE SUBSTRING (recs.call_number, '\s{1}[A-Z]{1}\d{1,}x{0,}a{0,}b{0,}') END AS second_cutter
FROM recs
ORDER BY
permanent_location_name,
-- then order by LC class
lc_class,
-- then order by lc class number (using the entire CASE WHEN statement above)
CASE
WHEN SUBSTRING (recs.call_number,1,1) in ('1','2') THEN NULL
WHEN SUBSTRING (recs.call_number,2,1) = ' ' THEN NULL
ELSE SUBSTRING (recs.call_number,'\d{1,4}\.{0,}\d{0,}')
END::NUMERIC,
-- then order by internal year (using the entire CASE WHEN statement)
TRIM (CASE WHEN substring (recs.call_number,1,1) IN ('1','2')
THEN substring (recs.call_number,1,4)
ELSE SUBSTRING (recs.call_number, '\s\d{4}\s{0,1}')
END)::INTEGER,
-- then order by first cutter, then second cutter, then the whole call number in case the cutters are blank
first_cutter,
second_cutter,
recs.call_number
;