[AMF/MME] Fix crash during S1/X2 handover cancellation by validating UE context association before deassociation (#3983)

Problem:
During inter-eNB/RAN handover scenarios, such as S1/N2 handover followed by X2/Xn handover cancellation,
the UE context may end up partially moved or duplicated across multiple eNBs. If the handover
is canceled by the target eNB and followed by subsequent UE Context Release or PathSwitchRequest
procedures, the MME can crash due to inconsistent context state. Specifically, when deassociating
the mme_ue <-> enb_ue (or amf_ue <-> ran_ue) pair, the code unconditionally resets the association
fields (`mme_ue->enb_ue_id`, `enb_ue->mme_ue_id`, etc.), even if they no longer reflect an actual
association due to the earlier handover cancellation.

Root Cause:
The MME or AMF state machine incorrectly assumes that the associated context IDs are still valid
and proceeds to unlink the context. When the PathSwitchRequest arrives after the UE context has
been (partially or fully) released, the assertion `enb_ue != NULL` or the mismatch in expected ID
(e.g., `mme_ue->enb_ue_id != enb_ue->id`) leads to a crash.

Solution:
This patch introduces stricter association validation before unlinking UE contexts. Specifically:

- The unlinking functions such as `enb_ue_unlink()` and `amf_ue_deassociate()` were replaced with
  more explicit versions: `enb_ue_deassociate_mme_ue()` and `amf_ue_deassociate_ran_ue()`, which
  compare the current context ID with the expected one.
- If the ID mismatch is detected, the deassociation is skipped and a detailed error is logged
  (rather than crashing with an assertion).
- This approach prevents crashes during handover cancellation cases and avoids incorrectly
  cleaning up a context that is already associated with a new peer.

Additionally:
- The same pattern was applied consistently across MME and AMF modules including:
  - `s1ap-handler.c`, `mme-context.c`, `mme-s11-handler.c`, `mme-gtp-path.c`
  - `ngap-handler.c`, `nsmf-handler.c`, `sbi-path.c`
- All previously direct field resets (`xxx_ue->xxx_ue_id = OGS_INVALID_POOL_ID`) are now guarded
  with validation logic.
- Logging was improved to aid in debugging unexpected deassociation cases.

This change improves robustness of the MME/AMF against abnormal handover procedures and
ensures graceful handling of late context release requests or race conditions during
handover cancel and re-establishment.

Fixes: assertion failure in `sgw_ue_check_if_relocated()` during PathSwitchRequest
This commit is contained in:
Sukchan Lee
2025-07-11 22:25:23 +09:00
parent 701505102f
commit 7575a7be13
10 changed files with 60 additions and 68 deletions

View File

@@ -1773,7 +1773,7 @@ void amf_ue_remove(amf_ue_t *amf_ue)
ogs_list_count(&amf_ue->sbi.xact_list));
ogs_sbi_object_free(&amf_ue->sbi);
amf_ue_deassociate(amf_ue);
amf_ue->ran_ue_id = OGS_INVALID_POOL_ID;
ogs_pool_id_free(&amf_ue_pool, amf_ue);
@@ -2231,16 +2231,16 @@ void amf_ue_associate_ran_ue(amf_ue_t *amf_ue, ran_ue_t *ran_ue)
ran_ue->amf_ue_id = amf_ue->id;
}
void ran_ue_deassociate(ran_ue_t *ran_ue)
{
ogs_assert(ran_ue);
ran_ue->amf_ue_id = OGS_INVALID_POOL_ID;
}
void amf_ue_deassociate(amf_ue_t *amf_ue)
void amf_ue_deassociate_ran_ue(amf_ue_t *amf_ue, ran_ue_t *ran_ue)
{
ogs_assert(amf_ue);
amf_ue->ran_ue_id = OGS_INVALID_POOL_ID;
ogs_assert(ran_ue);
if (amf_ue->ran_ue_id == ran_ue->id)
amf_ue->ran_ue_id = OGS_INVALID_POOL_ID;
else
ogs_error("Cannot deassociate amf_ue->ran_ue_id[%d] != ran_ue->id[%d]",
amf_ue->ran_ue_id, ran_ue->id);
}
void source_ue_associate_target_ue(

View File

@@ -542,7 +542,7 @@ struct amf_ue_s {
\
ran_ue_holding = ran_ue_find_by_id((__aMF)->ran_ue_id); \
if (ran_ue_holding) { \
ran_ue_deassociate(ran_ue_holding); \
ran_ue_holding->amf_ue_id = OGS_INVALID_POOL_ID; \
\
ogs_warn("[%s] Holding NG Context", (__aMF)->suci); \
ogs_warn("[%s] RAN_UE_NGAP_ID[%lld] AMF_UE_NGAP_ID[%lld]", \
@@ -1014,8 +1014,7 @@ OpenAPI_rat_type_e amf_ue_rat_type(amf_ue_t *amf_ue);
* - Delete Indirect Data Forwarding Tunnel Request/Response
*/
void amf_ue_associate_ran_ue(amf_ue_t *amf_ue, ran_ue_t *ran_ue);
void ran_ue_deassociate(ran_ue_t *ran_ue);
void amf_ue_deassociate(amf_ue_t *amf_ue);
void amf_ue_deassociate_ran_ue(amf_ue_t *amf_ue, ran_ue_t *ran_ue);
void source_ue_associate_target_ue(ran_ue_t *source_ue, ran_ue_t *target_ue);
void source_ue_deassociate_target_ue(ran_ue_t *ran_ue);

View File

@@ -1755,12 +1755,6 @@ void ngap_handle_ue_context_release_action(ran_ue_t *ran_ue)
break;
case NGAP_UE_CTX_REL_NG_REMOVE_AND_UNLINK:
ogs_debug(" Action: NG normal release");
ran_ue_remove(ran_ue);
if (!amf_ue) {
ogs_error("No UE(amf-ue) Context");
return;
}
amf_ue_deassociate(amf_ue);
/*
* When AMF release the NAS signalling connection,
@@ -1789,9 +1783,14 @@ void ngap_handle_ue_context_release_action(ran_ue_t *ran_ue)
* TODO: If the UE is registered for emergency services, the AMF shall
* set the mobile reachable timer with a value equal to timer T3512.
*/
ogs_timer_start(amf_ue->mobile_reachable.timer,
ogs_time_from_sec(amf_self()->time.t3512.value + 240));
if (amf_ue) {
amf_ue_deassociate_ran_ue(amf_ue, ran_ue);
ogs_timer_start(amf_ue->mobile_reachable.timer,
ogs_time_from_sec(amf_self()->time.t3512.value + 240));
} else
ogs_error("No UE(amf-ue) Context");
ran_ue_remove(ran_ue);
break;
case NGAP_UE_CTX_REL_UE_CONTEXT_REMOVE:

View File

@@ -767,10 +767,9 @@ int amf_nsmf_pdusession_handle_update_sm_context(
} else if (state == AMF_REMOVE_S1_CONTEXT_BY_LO_CONNREFUSED) {
if (AMF_SESSION_SYNC_DONE(amf_ue, state)) {
amf_ue_deassociate(amf_ue);
if (ran_ue) {
ogs_debug(" SUPI[%s]", amf_ue->supi);
amf_ue_deassociate_ran_ue(amf_ue, ran_ue);
ran_ue_remove(ran_ue);
} else {
ogs_warn("[%s] RAN-NG Context has already been removed",
@@ -812,14 +811,13 @@ int amf_nsmf_pdusession_handle_update_sm_context(
} else if (state == AMF_REMOVE_S1_CONTEXT_BY_RESET_ALL) {
if (AMF_SESSION_SYNC_DONE(amf_ue, state)) {
amf_ue_deassociate(amf_ue);
if (ran_ue) {
amf_gnb_t *gnb = NULL;
gnb = amf_gnb_find_by_id(ran_ue->gnb_id);
ogs_debug(" SUPI[%s]", amf_ue->supi);
amf_ue_deassociate_ran_ue(amf_ue, ran_ue);
ran_ue_remove(ran_ue);
if (gnb && ogs_list_count(&gnb->ran_ue_list) == 0) {
@@ -869,14 +867,13 @@ int amf_nsmf_pdusession_handle_update_sm_context(
if (AMF_SESSION_SYNC_DONE(amf_ue, state)) {
ran_ue_t *iter = NULL;
amf_ue_deassociate(amf_ue);
if (ran_ue) {
amf_gnb_t *gnb = NULL;
gnb = amf_gnb_find_by_id(ran_ue->gnb_id);
ogs_debug(" SUPI[%s]", amf_ue->supi);
amf_ue_deassociate_ran_ue(amf_ue, ran_ue);
ran_ue_remove(ran_ue);
if (gnb) {

View File

@@ -463,8 +463,8 @@ void amf_sbi_send_deactivate_all_ue_in_gnb(amf_gnb_t *gnb, int state)
new_xact_count = amf_sess_xact_count(amf_ue);
if (old_xact_count == new_xact_count) {
amf_ue_deassociate_ran_ue(amf_ue, ran_ue);
ran_ue_remove(ran_ue);
amf_ue_deassociate(amf_ue);
}
} else {
ogs_warn("amf_sbi_send_deactivate_all_ue_in_gnb()");

View File

@@ -3692,7 +3692,7 @@ void mme_ue_remove(mme_ue_t *mme_ue)
ogs_timer_delete(mme_ue->t_implicit_detach.timer);
ogs_timer_delete(mme_ue->gn.t_gn_holding);
enb_ue_unlink(mme_ue);
mme_ue->enb_ue_id = OGS_INVALID_POOL_ID;
mme_sess_remove_all(mme_ue);
mme_session_remove_all(mme_ue);
@@ -4173,16 +4173,16 @@ void enb_ue_associate_mme_ue(enb_ue_t *enb_ue, mme_ue_t *mme_ue)
enb_ue->mme_ue_id = mme_ue->id;
}
void enb_ue_deassociate(enb_ue_t *enb_ue)
{
ogs_assert(enb_ue);
enb_ue->mme_ue_id = OGS_INVALID_POOL_ID;
}
void enb_ue_unlink(mme_ue_t *mme_ue)
void enb_ue_deassociate_mme_ue(enb_ue_t *enb_ue, mme_ue_t *mme_ue)
{
ogs_assert(mme_ue);
mme_ue->enb_ue_id = OGS_INVALID_POOL_ID;
ogs_assert(enb_ue);
if (mme_ue->enb_ue_id == enb_ue->id)
mme_ue->enb_ue_id = OGS_INVALID_POOL_ID;
else
ogs_error("Cannot deassociate mme_ue->enb_ue_id[%d] != enb_ue->id[%d]",
mme_ue->enb_ue_id, enb_ue->id);
}
void enb_ue_source_associate_target(enb_ue_t *source_ue, enb_ue_t *target_ue)
@@ -4251,16 +4251,16 @@ void sgw_ue_associate_mme_ue(sgw_ue_t *sgw_ue, mme_ue_t *mme_ue)
sgw_ue->mme_ue_id = mme_ue->id;
}
void sgw_ue_deassociate(sgw_ue_t *sgw_ue)
{
ogs_assert(sgw_ue);
sgw_ue->mme_ue_id = OGS_INVALID_POOL_ID;
}
void sgw_ue_unlink(mme_ue_t *mme_ue)
void sgw_ue_deassociate_mme_ue(sgw_ue_t *sgw_ue, mme_ue_t *mme_ue)
{
ogs_assert(mme_ue);
mme_ue->sgw_ue_id = OGS_INVALID_POOL_ID;
ogs_assert(sgw_ue);
if (mme_ue->sgw_ue_id == sgw_ue->id)
mme_ue->sgw_ue_id = OGS_INVALID_POOL_ID;
else
ogs_error("Cannot deassociate mme_ue->sgw_ue_id[%d] != sgw_ue->id[%d]",
mme_ue->sgw_ue_id, sgw_ue->id);
}
void sgw_ue_source_associate_target(sgw_ue_t *source_ue, sgw_ue_t *target_ue)

View File

@@ -634,7 +634,7 @@ struct mme_ue_s {
\
enb_ue_holding = enb_ue_find_by_id((__mME)->enb_ue_id); \
if (enb_ue_holding) { \
enb_ue_deassociate(enb_ue_holding); \
enb_ue_holding->mme_ue_id = OGS_INVALID_POOL_ID; \
\
ogs_warn("[%s] Holding S1 Context", (__mME)->imsi_bcd); \
ogs_warn("[%s] ENB_UE_S1AP_ID[%d] MME_UE_S1AP_ID[%d]", \
@@ -1163,14 +1163,12 @@ int mme_ue_xact_count(mme_ue_t *mme_ue, uint8_t org);
* - Delete Indirect Data Forwarding Tunnel Request/Response
*/
void enb_ue_associate_mme_ue(enb_ue_t *enb_ue, mme_ue_t *mme_ue);
void enb_ue_deassociate(enb_ue_t *enb_ue);
void enb_ue_unlink(mme_ue_t *mme_ue);
void enb_ue_deassociate_mme_ue(enb_ue_t *enb_ue, mme_ue_t *mme_ue);
void enb_ue_source_associate_target(enb_ue_t *source_ue, enb_ue_t *target_ue);
void enb_ue_source_deassociate_target(enb_ue_t *enb_ue);
void sgw_ue_associate_mme_ue(sgw_ue_t *sgw_ue, mme_ue_t *mme_ue);
void sgw_ue_deassociate(sgw_ue_t *sgw_ue);
void sgw_ue_unlink(mme_ue_t *mme_ue);
void sgw_ue_deassociate_mme_ue(sgw_ue_t *sgw_ue, mme_ue_t *mme_ue);
void sgw_ue_source_associate_target(sgw_ue_t *source_ue, sgw_ue_t *target_ue);
void sgw_ue_source_deassociate_target(sgw_ue_t *sgw_ue);

View File

@@ -656,7 +656,7 @@ void mme_gtp_send_release_all_ue_in_enb(mme_enb_t *enb, int action)
* Execute enb_ue_unlink(mme_ue) and enb_ue_remove(enb_ue)
* before mme_gtp_send_release_access_bearers_request()
*/
enb_ue_unlink(mme_ue);
enb_ue_deassociate_mme_ue(enb_ue, mme_ue);
enb_ue_remove(enb_ue);
}

View File

@@ -1492,13 +1492,12 @@ void mme_s11_handle_release_access_bearers_response(
* for new UE-associated logical S1-connections over the S1 interface,
* the MME shall respond with the RESET ACKNOWLEDGE message.
*/
enb_ue_unlink(mme_ue);
if (enb_ue) {
mme_enb_t *enb = NULL;
enb = mme_enb_find_by_id(enb_ue->enb_id);
enb_ue_deassociate_mme_ue(enb_ue, mme_ue);
enb_ue_remove(enb_ue);
if (enb && ogs_list_count(&enb->enb_ue_list) == 0) {
@@ -1513,13 +1512,12 @@ void mme_s11_handle_release_access_bearers_response(
} else if (action == OGS_GTP_RELEASE_S1_CONTEXT_REMOVE_BY_RESET_PARTIAL) {
enb_ue_t *iter = NULL;
enb_ue_unlink(mme_ue);
if (enb_ue) {
mme_enb_t *enb = NULL;
enb = mme_enb_find_by_id(enb_ue->enb_id);
enb_ue_deassociate_mme_ue(enb_ue, mme_ue);
enb_ue_remove(enb_ue);
if (enb) {

View File

@@ -1961,12 +1961,13 @@ void s1ap_handle_ue_context_release_action(enb_ue_t *enb_ue)
break;
case S1AP_UE_CTX_REL_S1_REMOVE_AND_UNLINK:
ogs_debug(" Action: S1 normal release");
enb_ue_remove(enb_ue);
if (!mme_ue) {
if (mme_ue)
enb_ue_deassociate_mme_ue(enb_ue, mme_ue);
else
ogs_error("No UE(mme-ue) context");
return;
}
enb_ue_unlink(mme_ue);
enb_ue_remove(enb_ue);
break;
case S1AP_UE_CTX_REL_UE_CONTEXT_REMOVE:
ogs_debug(" Action: UE context remove");
@@ -2057,16 +2058,16 @@ void s1ap_handle_ue_context_release_action(enb_ue_t *enb_ue)
break;
case S1AP_UE_CTX_REL_S1_PAGING:
ogs_debug(" Action: S1 paging");
enb_ue_remove(enb_ue);
if (!mme_ue) {
ogs_error("No UE(mme-ue) context");
return;
}
enb_ue_unlink(mme_ue);
if (mme_ue) {
enb_ue_deassociate_mme_ue(enb_ue, mme_ue);
r = s1ap_send_paging(mme_ue, S1AP_CNDomain_ps);
ogs_expect(r == OGS_OK);
ogs_assert(r != OGS_ERROR);
r = s1ap_send_paging(mme_ue, S1AP_CNDomain_ps);
ogs_expect(r == OGS_OK);
ogs_assert(r != OGS_ERROR);
} else
ogs_error("No UE(mme-ue) context");
enb_ue_remove(enb_ue);
break;
default:
ogs_error("Invalid Action[%d]", enb_ue->ue_ctx_rel_action);