mirror of
https://github.com/RangeNetworks/openbts.git
synced 2025-10-22 23:32:00 +00:00
Attempt to fix a locking problem in mmCheckSipMsgs and mmCheckTimers. Tickets #1905 and #1914. Formerly these methods held the global lock while running the transaction state machine; now they hold the global lock only to generate a temporary vector of transactions to process, then release the global lock and rely on a lock in the transaction to prevent simultaneous access to the state machine.
Keep deleted TranEntrys around a while after they are deleted to try to avoid crashes: Add sDeletedTranEntrys list to hold last 100 deleted TranEntrys. Add TranDeleted CCState. Set this when transaction is being deleted. Check for this state when starting state machines.
This commit is contained in:
@@ -109,6 +109,8 @@ struct CCState {
|
||||
HandoverProgress = 104,
|
||||
HandoverOutbound = 105,
|
||||
//BusyReject, // pat removed, not used
|
||||
|
||||
TranDeleted
|
||||
};
|
||||
static const char* callStateString(CallState state);
|
||||
static bool isInCall(CallState state);
|
||||
|
@@ -268,6 +268,16 @@ void MMUser::mmuFree(MMUserMap::iterator *piter, TermCause cause) // Some caller
|
||||
delete this;
|
||||
}
|
||||
|
||||
void MMContext::mmGetTranList(TranEntryVector &tranlist)
|
||||
{
|
||||
tranlist.clear(); // Be sure.
|
||||
ScopedLock lock(gMMLock,__FILE__,__LINE__);
|
||||
for (unsigned i = TE_first; i < TE_num; i++) {
|
||||
RefCntPointer<TranEntry> tranp = mmGetTran(i);
|
||||
if (! tranp.isNULL()) { tranlist.push_back(tranp); }
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool MMContext::mmCheckSipMsgs()
|
||||
{
|
||||
@@ -277,11 +287,17 @@ bool MMContext::mmCheckSipMsgs()
|
||||
// messages during the stress test. Ticket #1905 reports a crash that looks like using a transaction here while
|
||||
// it is being deleted. Since I have separated layer2 from layer3 with InterthreadQueues since the last test,
|
||||
// I am re-enabling this global lock to attempt to fix the crash.
|
||||
ScopedLock lock(gMMLock,__FILE__,__LINE__); // I think this is unnecessary, but be safe.
|
||||
// (pat) 10-10-2014 update: Holding the global lock here appears to cause system lockups as reported in St Pierre. Not sure why.
|
||||
// So here is the revised plan: The gMMLock will be used only temporarily to protect access to the list, then we will
|
||||
// drop the global lock and attempt to invoke lockAndInvokeSipMsgs, which will be modified to check that the transaction
|
||||
// has not been deleted in the interim period - the mL3RewriteLock in the transaction prevents collisions at that transaction level.
|
||||
// ScopedLock lock(gMMLock,__FILE__,__LINE__);
|
||||
TranEntryVector tranlist;
|
||||
mmGetTranList(tranlist);
|
||||
bool result = false;
|
||||
for (unsigned i = TE_first; i < TE_num; i++) {
|
||||
RefCntPointer<TranEntry> tranp = mmGetTran(i);
|
||||
if (! tranp.isNULL()) { result |= tranp->lockAndInvokeSipMsgs(); }
|
||||
for (TranEntryVector::iterator it = tranlist.begin(); it != tranlist.end(); it++) {
|
||||
TranEntry *tranp = (*it).self();
|
||||
result |= tranp->lockAndInvokeSipMsgs();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -292,12 +308,14 @@ bool MMContext::mmCheckTimers()
|
||||
// As an interim measure, just dont lock this and hope for the best.
|
||||
//ScopedLock lock(gMMLock,__FILE__,__LINE__); // I think this is unnecessary; the channel cannot change when this is called, but be safe.
|
||||
// Check MM timers.
|
||||
TranEntryVector tranlist;
|
||||
mmGetTranList(tranlist);
|
||||
|
||||
// checkTimers locks the transaction if any timer needs servicing.
|
||||
bool result = false;
|
||||
for (unsigned i = TE_first; i < TE_num; i++) {
|
||||
RefCntPointer<TranEntry> tranp = mmGetTran(i);
|
||||
if (! tranp.isNULL()) { result |= tranp->checkTimers(); }
|
||||
for (TranEntryVector::iterator it = tranlist.begin(); it != tranlist.end(); it++) {
|
||||
TranEntry *tranp = (*it).self();
|
||||
result |= tranp->checkTimers();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -996,7 +1014,7 @@ void MMContext::mmcFree(TermCause cause)
|
||||
for (unsigned tries = 0; tries < 3; tries++) {
|
||||
if (mmcTE[i] != NULL) { mmcTE[i]->teCancel(cause); } // Removes the transaction from mmcTE via mmDisconnectTran
|
||||
}
|
||||
assert(mmcTE[i] == NULL); // teCancel removed it.
|
||||
assert(mmcTE[i].self() == NULL); // teCancel removed it.
|
||||
}
|
||||
LOG(DEBUG)<<"MMContext DELETE "<<(void*)this;
|
||||
delete this;
|
||||
|
@@ -106,6 +106,8 @@ class MMUser : public MemCheckMMUser /*: public RefCntBase*/ {
|
||||
std::ostream& operator<<(std::ostream& os, const MMUser&mmu);
|
||||
std::ostream& operator<<(std::ostream& os, const MMUser*mmu);
|
||||
|
||||
typedef std::vector< RefCntPointer<TranEntry> > TranEntryVector;
|
||||
|
||||
// This is the set of actively runnning TranEntrys on an L3LogicalChannel.
|
||||
// TODO: The MM operations should run directly on the MMContext, not in a TranEntry.
|
||||
DEFINE_MEMORY_LEAK_DETECTOR_CLASS(MMContext,MemCheckMMContext)
|
||||
@@ -164,6 +166,7 @@ class MMContext : public MemCheckMMContext /*: public RefCntBase*/ {
|
||||
time_t mmcDuration() const { return time(NULL) - mmcOpenTime; }
|
||||
|
||||
RefCntPointer<TranEntry> mmGetTran(unsigned ati) const;
|
||||
void mmGetTranList( TranEntryVector &tranlist);
|
||||
void mmConnectTran(ActiveTranIndex ati, TranEntry *tran);
|
||||
void mmConnectTran(TranEntry *tran);
|
||||
void mmDisconnectTran(TranEntry *tran);
|
||||
|
@@ -93,6 +93,8 @@ static const char* createNewTransactionTable = {
|
||||
|
||||
|
||||
|
||||
static std::list< RefCntPointer<TranEntry> > sDeletedTranEntrys;
|
||||
|
||||
|
||||
|
||||
HandoverEntry::HandoverEntry(const TranEntry *tran) :
|
||||
@@ -646,6 +648,10 @@ CallState TranEntryProtected::getGSMState() const
|
||||
|
||||
void TranEntryProtected::setGSMState(CallState wState)
|
||||
{
|
||||
if (mGSMState == CCState::TranDeleted) {
|
||||
// Shouldnt happen but be sure: never change state from deleted to anything else.
|
||||
return;
|
||||
}
|
||||
//ScopedLock lock(mLock,__FILE__,__LINE__);
|
||||
mStateTimer.now();
|
||||
|
||||
@@ -1441,6 +1447,7 @@ bool TranEntry::lockAndStart(MachineBase *wProc, GSM::L3Message *l3msg)
|
||||
// For example if lch is FACCH, we cannot send anything downstrem on that.
|
||||
bool TranEntry::lockAndInvokeL3Msg(const GSM::L3Message *l3msg /*, const L3LogicalChannel *lch*/)
|
||||
{
|
||||
if (this->getGSMState() == CCState::TranDeleted) { return false; } // (pat) Paranoid check that TranEntry still extant.
|
||||
LOG(DEBUG);
|
||||
bool result = false;
|
||||
RefCntPointer<TranEntry> saver = this;
|
||||
@@ -1457,6 +1464,7 @@ bool TranEntry::lockAndInvokeL3Msg(const GSM::L3Message *l3msg /*, const L3Logic
|
||||
// l3msg may be NULL for primitives or unparseable messages.
|
||||
bool TranEntry::lockAndInvokeFrame(const L3Frame *frame, const L3Message *l3msg)
|
||||
{
|
||||
if (this->getGSMState() == CCState::TranDeleted) { return false; } // (pat) Paranoid check that TranEntry still extant.
|
||||
LOG(DEBUG) << l3msg;
|
||||
bool result = false;
|
||||
RefCntPointer<TranEntry> saver = this;
|
||||
@@ -1476,6 +1484,7 @@ bool TranEntry::lockAndInvokeFrame(const L3Frame *frame, const L3Message *l3msg)
|
||||
// Caller responsible for deleting the sipmsg.
|
||||
bool TranEntry::lockAndInvokeSipMsg(const SIP::DialogMessage *sipmsg)
|
||||
{
|
||||
if (this->getGSMState() == CCState::TranDeleted) { return false; } // (pat) Paranoid check that TranEntry still extant.
|
||||
bool result = false;
|
||||
RefCntPointer<TranEntry> saver = this;
|
||||
{ ScopedLock lock(mL3RewriteLock,__FILE__,__LINE__);
|
||||
@@ -1503,6 +1512,8 @@ bool TranEntry::lockAndInvokeSipMsgs()
|
||||
|
||||
bool TranEntry::lockAndInvokeTimeout(L3Timer *timer)
|
||||
{
|
||||
if (this->getGSMState() == CCState::TranDeleted) { return false; } // (pat) Paranoid check that TranEntry still extant.
|
||||
|
||||
// handleMachineStatus may unlink the transaction; this reference prevents the transaction
|
||||
// from being deleted until this routine exists. Without this, the ScopedLock tries to reference the deleted transaction.
|
||||
bool result = false;
|
||||
@@ -1620,14 +1631,24 @@ void TranEntry::teRemove(TermCause cause)
|
||||
// will not destroy itself while a transaction still points at it. Taking the transaction
|
||||
// out of the TransactionTable prevents the dialog from finding the transaction any longer.
|
||||
// However to prevent a race we must do this after using the dialog, which we did above.
|
||||
setGSMState(CCState::NullState); // redundant, transaction is being deleted.
|
||||
setGSMState(CCState::TranDeleted); // (pat 10-10-2014) Now we use this to mark the TranEntry as deleted.
|
||||
gNewTransactionTable.ttRemove(this->tranID());
|
||||
|
||||
while (currentProcedure()) {
|
||||
delete tran()->tePopMachine();
|
||||
delete this->tePopMachine();
|
||||
}
|
||||
|
||||
sDeletedTranEntrys.push_back(RefCntPointer<TranEntry>(this));
|
||||
|
||||
if (mContext) { mContext->mmDisconnectTran(this); } // DANGER: this deletes the transaction as a side effect.
|
||||
mContext = NULL;
|
||||
|
||||
while (sDeletedTranEntrys.size() > 100) {
|
||||
RefCntPointer<TranEntry> tran = sDeletedTranEntrys.front();
|
||||
sDeletedTranEntrys.pop_front();
|
||||
// The act of moving it from the list deletes the reference count and causes it to be deleted.
|
||||
LOG(DEBUG) << "Deleting transaction:"<<tran->tranID();
|
||||
}
|
||||
}
|
||||
|
||||
// Send closure messages for a transaction that is known to be a CS transaction, using the specified CC cause.
|
||||
@@ -1635,9 +1656,13 @@ void TranEntry::teRemove(TermCause cause)
|
||||
// To close all transactions on a channel, see L3LogicalChannel::chanClose()
|
||||
void TranEntry::teCloseCallNow(TermCause cause, bool sendCause)
|
||||
{
|
||||
LOG(INFO) << "SIP term info closeCallNow cause: " << cause; // SVGDBG
|
||||
//LOG(INFO) << "SIP term info closeCallNow cause: " << cause; // SVGDBG
|
||||
WATCHINFO("CloseCallNow"<<LOGVAR2("cause",cause)<<" "<<channel()->descriptiveString());
|
||||
LOG(DEBUG) <<LOGVAR(cause) << this << gMMLayer.printMMInfo();
|
||||
if (tran()->getGSMState() == CCState::TranDeleted) { // (pat) Paranoid check that TranEntry still extant.
|
||||
LOG(ERR) << "detected closeCall on deleted transaction";
|
||||
return;
|
||||
}
|
||||
|
||||
tran()->teCloseDialog(cause); // Redundant with teRemove, but I want to be sure we terminate the dialog regardless of bugs.
|
||||
if (isL3TIValid() && tran()->getGSMState() != CCState::NullState && tran()->getGSMState() != CCState::ReleaseRequest) {
|
||||
|
Reference in New Issue
Block a user