Message ID | 20240208134322.3936320-1-tobias.hagelborn@axis.com |
---|---|
State | New |
Headers | show |
Series | hashserv: Postgres adaptations for ignoring duplicate inserts | expand |
On Thu, Feb 8, 2024 at 6:43 AM Tobias Hagelborn <tobias.hagelborn@axis.com> wrote: > > From: Tobias Hagelborn <tobiasha@axis.com> > > Hash Equivalence server performs unconditional insert also of duplicate > hash entries. This causes excessive error log entries in Postgres. > Rather ignore the duplicate inserts. > > The alternate behavior should be isolated to the postgres > engine type. Thanks. General approach seems fine, but we need to make sure the function return codes are correct still, see below. > > Signed-off-by: Tobias Hagelborn <tobias.hagelborn@axis.com> > --- > > I have only had the opportunity to validate against Postgres DB. > > lib/hashserv/sqlalchemy.py | 33 +++++++++++++++++++++++++++------ > 1 file changed, 27 insertions(+), 6 deletions(-) > > diff --git a/lib/hashserv/sqlalchemy.py b/lib/hashserv/sqlalchemy.py > index cee04bff..af9da80d 100644 > --- a/lib/hashserv/sqlalchemy.py > +++ b/lib/hashserv/sqlalchemy.py > @@ -32,6 +32,7 @@ from sqlalchemy import ( > import sqlalchemy.engine > from sqlalchemy.orm import declarative_base > from sqlalchemy.exc import IntegrityError > +from sqlalchemy.dialects.postgresql import insert as postgres_insert > > Base = declarative_base() > > @@ -287,11 +288,23 @@ class Database(object): > return result.rowcount > > async def insert_unihash(self, method, taskhash, unihash): > - statement = insert(UnihashesV2).values( > - method=method, > - taskhash=taskhash, > - unihash=unihash, > - ) > + # Postgres specific ignore on insert duplicate > + if self.engine.name == 'postgresql': > + statement = postgres_insert(UnihashesV2).values( > + method=method, > + taskhash=taskhash, > + unihash=unihash, > + ) > + statement = statement.on_conflict_do_nothing( > + index_elements=("method", "taskhash") > + ) > + else: > + statement = insert(UnihashesV2).values( > + method=method, > + taskhash=taskhash, > + unihash=unihash, > + ) > + Do you need to check the rowcount from the execution result in order to know if you should return True of False in the function? I didn't bother doing that before because it would raise an IntergrityError, but with the "do nothing" I'm not sure that will happen anymore. > self.logger.debug("%s", statement) > try: > async with self.db.begin(): > @@ -311,7 +324,15 @@ class Database(object): > if "created" in data and not isinstance(data["created"], datetime): > data["created"] = datetime.fromisoformat(data["created"]) > > - statement = insert(OuthashesV2).values(**data) > + # Postgres specific ignore on insert duplicate > + if self.engine.name == 'postgresql': > + statement = postgres_insert(OuthashesV2).values(**data) > + statement = statement.on_conflict_do_nothing( > + index_elements=("method", "taskhash", "outhash") > + ) > + else: > + statement = insert(OuthashesV2).values(**data) > + > self.logger.debug("%s", statement) > try: > async with self.db.begin(): Same here > -- > 2.30.2 > > > -=-=-=-=-=-=-=-=-=-=-=- > Links: You receive all messages sent to this group. > View/Reply Online (#15842): https://lists.openembedded.org/g/bitbake-devel/message/15842 > Mute This Topic: https://lists.openembedded.org/mt/104238536/3616693 > Group Owner: bitbake-devel+owner@lists.openembedded.org > Unsubscribe: https://lists.openembedded.org/g/bitbake-devel/unsub [JPEWhacker@gmail.com] > -=-=-=-=-=-=-=-=-=-=-=- >
diff --git a/lib/hashserv/sqlalchemy.py b/lib/hashserv/sqlalchemy.py index cee04bff..af9da80d 100644 --- a/lib/hashserv/sqlalchemy.py +++ b/lib/hashserv/sqlalchemy.py @@ -32,6 +32,7 @@ from sqlalchemy import ( import sqlalchemy.engine from sqlalchemy.orm import declarative_base from sqlalchemy.exc import IntegrityError +from sqlalchemy.dialects.postgresql import insert as postgres_insert Base = declarative_base() @@ -287,11 +288,23 @@ class Database(object): return result.rowcount async def insert_unihash(self, method, taskhash, unihash): - statement = insert(UnihashesV2).values( - method=method, - taskhash=taskhash, - unihash=unihash, - ) + # Postgres specific ignore on insert duplicate + if self.engine.name == 'postgresql': + statement = postgres_insert(UnihashesV2).values( + method=method, + taskhash=taskhash, + unihash=unihash, + ) + statement = statement.on_conflict_do_nothing( + index_elements=("method", "taskhash") + ) + else: + statement = insert(UnihashesV2).values( + method=method, + taskhash=taskhash, + unihash=unihash, + ) + self.logger.debug("%s", statement) try: async with self.db.begin(): @@ -311,7 +324,15 @@ class Database(object): if "created" in data and not isinstance(data["created"], datetime): data["created"] = datetime.fromisoformat(data["created"]) - statement = insert(OuthashesV2).values(**data) + # Postgres specific ignore on insert duplicate + if self.engine.name == 'postgresql': + statement = postgres_insert(OuthashesV2).values(**data) + statement = statement.on_conflict_do_nothing( + index_elements=("method", "taskhash", "outhash") + ) + else: + statement = insert(OuthashesV2).values(**data) + self.logger.debug("%s", statement) try: async with self.db.begin():