Skip to Content.
Sympa Menu

grouper-users - Bad Memberships - duplicate entries in grouper_memberships

Subject: Grouper Users - Open Discussion List

List archive

Bad Memberships - duplicate entries in grouper_memberships


Chronological Thread 
  • From:
  • To:
  • Subject: Bad Memberships - duplicate entries in grouper_memberships
  • Date: Fri, 5 Mar 2010 06:58:36 -0500 (EST)

Hi,

We're experiencing the same problem than the Bad Membership :
https://mail.internet2.edu/wws/arc/grouper-users/2009-11/msg00013.html

We're also using grouper 1.4.2 with MySQL:

> 1500000 entries in grouper_memberships

> 350000 duplicate entries in grouper_memberships (immediate & effective)
~ 350 groups (distinct owner_id) in duplicate

This could be really long with the findBadMemberships() as we saw in
https://spaces.internet2.edu/download/attachments/10059859/v1.4.2+Grouper+Documentation.pdf?version=1
. So we're thinking of doing this with a SQL script.

We've created one and we would like to have an advice of what could be wrong
or missing.

Regards

Christophe


Here is the script :

--drop if exists the used tables and procedures
DROP TABLE IF EXISTS TMP_SORT;
DROP TABLE IF EXISTS TMP_DBL;
DROP PROCEDURE IF EXISTS tree;
DROP PROCEDURE IF EXISTS auto_deletion;

-- sorting table used to get the first (registered) of each duplicate entry
CREATE TABLE TMP_SORT(
OWNER_ID VARCHAR(128),
MEMBER_ID VARCHAR(128),
FIELD_ID VARCHAR(128),
CREATE_TIME BIGINT(20),
PRIMARY KEY(OWNER_ID,MEMBER_ID,FIELD_ID)
);

-- insert into the table the first registered
INSERT INTO TMP_SORT (
SELECT OWNER_ID, MEMBER_ID, FIELD_ID, min(CREATE_TIME)
FROM GROUPER_MEMBERSHIPS
WHERE DEPTH='immediate'
GROUP BY OWNER_ID, MEMBER_ID, FIELD_ID
having count(*)>1
);

-- table used to store uid of memberships to delete
CREATE TABLE TMP_DBL(
ID VARCHAR(128) primary key,
DEPTH int(5)
);

-- insert into tmp_dbl the uid and depth of the second+ registered duplicate
INSERT INTO TMP_DBL (
SELECT ID, DEPTH
FROM GROUPER_MEMBERSHIPS gm
INNER JOIN TMP_SORT ts
ON ts.OWNER_ID=gm.OWNER_ID and ts.MEMBER_ID=gm.MEMBER_ID and
ts.FIELD_ID=gm.FIELD_ID
WHERE gm.CREATE_TIME<>ts.CREATE_TIME and DEPTH='immediate'
);

-- procedure creation
DELIMITER //
-- procedure tree : insert into tmp_dbl (memberships to delete) the children
of the initials duplicate
CREATE PROCEDURE tree()
BEGIN
DECLARE nbFils INT DEFAULT 1;
DECLARE currentDepth INT DEFAULT 0;
-- LOOP while the last insertion have children
WHILE nbFils > 0 DO
--go to the children
SET currentDepth = currentDepth + 1;
--insert children
INSERT INTO TMP_DBL (
SELECT gm.ID, currentDepth
FROM grouper_memberships gm
INNER JOIN TMP_DBL tb
ON gm.parent_membership=tb.ID
WHERE tb.DEPTH=(currentDepth-1)
);
--check the number of children's children
SELECT count(ID) INTO nbFils FROM TMP_DBL WHERE
DEPTH=currentDepth;
SELECT count(ID) FROM TMP_DBL WHERE DEPTH=currentDepth;
END WHILE;
END;//

-- pocedure which delete the duplicate
-- parse the duplicate membership to delete from the leave of children to the
root parents
CREATE PROCEDURE auto_deletion()
BEGIN
DECLARE maxDepth INT DEFAULT 0;
-- get the maximum depth of duplicate to delete
SELECT max(tmp_dbl.DEPTH) INTO maxDepth FROM tmp_dbl;
-- until we didn't delete the root parents
WHILE maxDepth >= 0 DO
-- display the number of duplicate to delete at this level
SELECT count(ID), DEPTH
FROM TMP_DBL
WHERE DEPTH=maxDepth
group by DEPTH;
-- delete the duplicate of this level :
DELETE FROM grouper_memberships
WHERE id IN (
SELECT id
FROM tmp_dbl
WHERE tmp_dbl.DEPTH=maxDepth
);
-- go to the previous level
SET maxDepth = maxDepth - 1;
END WHILE;
END;//

DELIMITER ;

-- creation of the deletion_tree
CALL tree();
-- call the deletion
CALL auto_deletion();

-- drop tables and procedures
DROP TABLE TMP_SORT;
DROP TABLE TMP_DBL;
DROP PROCEDURE IF EXISTS tree;
DROP PROCEDURE IF EXISTS auto_deletion;



Archive powered by MHonArc 2.6.16.

Top of Page