-
Notifications
You must be signed in to change notification settings - Fork 2
/
koha_610a_duplicates.php
126 lines (102 loc) · 3.6 KB
/
koha_610a_duplicates.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
<?php
/*
* koha_610a_duplicates.php -- Identifies duplicate subfields in field 610.
* Copyright (C) 2017 Andreas Roussos
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
// The following dependency can be installed using `pear`.
require 'File/MARCXML.php' ;
$time_start = microtime ( true ) ;
echo "<PRE>\n" ;
// Please fill in the next four variables.
$dbhost = '' ;
$dbuser = '' ;
$dbpass = '' ;
$dbname = '' ;
$conn = mysqli_connect ( $dbhost, $dbuser, $dbpass, $dbname ) ;
if ( mysqli_connect_errno ( $conn ) ) {
printf ( "Connect failed: %s\n", mysqli_connect_error ( $conn ) ) ;
exit ;
}
if ( ! mysqli_set_charset ( $conn, "utf8" ) ) {
printf (
"Error loading character set utf8: %s\n", mysqli_error ( $conn ) ) ;
exit ;
}
$query =
"SELECT
biblionumber,
metadata AS marcxml
FROM
biblio_metadata
WHERE
format = 'marcxml'" ;
if ( ! $res = mysqli_query ( $conn, $query ) ) {
printf ( "mysqli_query failed: %s\n", mysqli_error ( $conn ) ) ;
exit ;
}
if ( mysqli_num_rows ( $res ) != 0 ) {
while ( $row = mysqli_fetch_assoc ( $res ) ) {
// returns a File_MARCXML object from an XML file
$journals = new File_MARCXML (
$row [ 'marcxml' ], File_MARC::SOURCE_STRING ) ;
//print_r ( $journals ) ;
// decodes the next record and returns a File_MARC_Record object
$record = $journals -> next ( ) ;
//print "$record\n" ;
// returns an array containing all File_MARC_Data_Field objects
// that match the specified tag name
$fields = $record -> getFields ( '610' ) ;
//print_r ( $fields ) ;
// iterate over the array
foreach ( $fields as $key => $datafield ) {
//print_r ( $datafield ) ;
if ( ( $fields [ $key ] -> getIndicator ( 1 ) == '0' ) &&
( $fields [ $key ] -> getIndicator ( 2 ) == ' ' ) ) {
// returns a File_MARC_List object that contains all of the
// subfields
$subfields = $datafield -> getSubfields ( ) ;
//print_r ( $subfields ) ;
$arr [ ] = $subfields [ 0 ] -> getData ( ) ;
}
}
// Now that the foreach has completed, we can check for values > 1.
// This allows us to detect dupes like:
// 610 0 _aValue
// _aValue
//
// and also:
//
// 610 0 _aValue
// 610 0 _aOtherValue
// 610 0 _aValue
if ( ! empty ( $arr ) ) {
foreach ( array_count_values ( $arr ) as $key => $value ) {
if ( $value > 1 ) {
echo 'biblio ' . $row [ 'biblionumber' ]
. " field 610: $value occurrences of " . $key . "\n" ;
}
}
unset ( $arr ) ;
}
}
} else
exit ;
mysqli_close ( $conn ) ;
echo "</PRE>\n" ;
$time_end = microtime ( true ) ;
$time = $time_end - $time_start ;
printf ( "\nProcess time: %.3f seconds\n", $time ) ;
?>