查看原文
其他

在Driver中调用I/O API的时候你考虑到了吗?

2018-03-09 FaEry 看雪学院

去年学习了之后就给忘了,现在又花了半天时间熟悉了这块的知识,防止自己再忘记先记录下来警醒自己。


本文主题在于指出在驱动中调用I/O函数时存在的问题,轻者卡死,重者BSOD。



1. 前言

     

驱动中我们经常使用一些I/O函数来查询文件、设备的信息,比如IoQueryFileDosDeviceName获取进程的DOS路径,IoVolumeDeviceToDosName获取卷的DOS名称等等,一般使用这些函数的场景无外乎在LoadImage回调,CreateProcess回调,微文件过滤器/传统文件过滤器注册的callback,甚至会在一些内核Hook点中调用。可能不做线上产品用户不多的时候很多问题不会被察觉,自己尝试的时候基本上也无法出现问题。但是如果用户一多可能会遇到各种奇奇怪怪的反馈,那么到底哪里容易出问题呢?



2. I/O API的特殊性

   

Windows中的I/O管理器提供的API大多数都是异步完成的,而其内部泛滥地使用APC,导致很多I/O函数对使用场景有很高的要求,例如:

    

    IoVolumeDeviceToDosName

       Starting with Windows Vista, you must ensure that APCs are not disabled before calling this routine. The KeAreAllApcsDisabled routine can be used to verify that APCs are not disabled.

   

意思很明确,就是说这个API内部需要用到KernelApc,在调用时得确保当前线程的APCs可以执行,可以用 KeAreAllApcsDisabled这个API做判断。

   

你无法预知当前的代码执行时的环境是怎么样的,比如说在LoadImage回调,你无法确保当前的IRQL一定是PASSIVE_LEVEL,或者没有在一些内核的临界区范围内,说到内核的临界区,现在常用的两种:


KeEnterCriticalRegion与KeEnterGuardedRegion。



我们看到在调用KeEnterCriticalRegion后当前线程的KernelApcDisable是非零值,而 调用 KeEnterGuardedRegion之后 当前线程的SpecialApcDisable是非零值 
这两个是什么玩意呢??


我们接着来看Apc分发的时候如何使用这两个值的


VOID

NTAPI

KiDeliverApc(IN KPROCESSOR_MODE DeliveryMode,

             IN PKEXCEPTION_FRAME ExceptionFrame,

             IN PKTRAP_FRAME TrapFrame)

{

    PKTHREAD Thread = KeGetCurrentThread();

    PKPROCESS Process = Thread->ApcState.Process;

    PKTRAP_FRAME OldTrapFrame;

    PLIST_ENTRY ApcListEntry;

    PKAPC Apc;

    KLOCK_QUEUE_HANDLE ApcLock;

    PKKERNEL_ROUTINE KernelRoutine;

    PVOID NormalContext;

    PKNORMAL_ROUTINE NormalRoutine;

    PVOID SystemArgument1;

    PVOID SystemArgument2;

    ASSERT_IRQL_EQUAL(APC_LEVEL);

 

    /* Save the old trap frame and set current one */

    OldTrapFrame = Thread->TrapFrame;

    Thread->TrapFrame = TrapFrame;

 

    /* Clear Kernel APC Pending */

    Thread->ApcState.KernelApcPending = FALSE;

 

    /* Check if Special APCs are disabled */

    if (Thread->SpecialApcDisable) goto Quickie;    // 总开关,如果SpecialApcDisable那么整个线程的APC都不会被执行

 

    /* Do the Kernel APCs first */

    while (!IsListEmpty(&Thread->ApcState.ApcListHead[KernelMode]))

    {

        /* Lock the APC Queue */

        KiAcquireApcLockAtApcLevel(Thread, &ApcLock);

 

        /* Check if the list became empty now */

        if (IsListEmpty(&Thread->ApcState.ApcListHead[KernelMode]))

        {

            /* It is, release the lock and break out */

            KiReleaseApcLock(&ApcLock);

            break;

        }

 

        /* Kernel APC is not pending anymore */

        Thread->ApcState.KernelApcPending = FALSE;

 

        /* Get the next Entry */

        ApcListEntry = Thread->ApcState.ApcListHead[KernelMode].Flink;

        Apc = CONTAINING_RECORD(ApcListEntry, KAPC, ApcListEntry);

 

        /* Save Parameters so that it's safe to free the Object in the Kernel Routine*/

        NormalRoutine = Apc->NormalRoutine;

        KernelRoutine = Apc->KernelRoutine;

        NormalContext = Apc->NormalContext;

        SystemArgument1 = Apc->SystemArgument1;

        SystemArgument2 = Apc->SystemArgument2;

 

        /* Special APC */

        if (!NormalRoutine)

        {

            /* Remove the APC from the list */

            RemoveEntryList(ApcListEntry);

            Apc->Inserted = FALSE;

 

            /* Release the APC lock */

            KiReleaseApcLock(&ApcLock);

 

            /* Call the Special APC */

            KernelRoutine(Apc,

                          &NormalRoutine,

                          &NormalContext,

                          &SystemArgument1,

                          &SystemArgument2);

 

            /* Make sure it returned correctly */

            if (KeGetCurrentIrql() != ApcLock.OldIrql)

            {

                KeBugCheckEx(IRQL_UNEXPECTED_VALUE,

                             (KeGetCurrentIrql() << 16) |

                             (ApcLock.OldIrql << 8),

                             (ULONG_PTR)KernelRoutine,

                             (ULONG_PTR)Apc,

                             (ULONG_PTR)NormalRoutine);

            }

        }

        else

        {

            /* Normal Kernel APC, make sure it's safe to deliver */

            if ((Thread->ApcState.KernelApcInProgress) ||

                (Thread->KernelApcDisable))  // 子开关,控制着Normal KernelApc的执行与否

            {

                /* Release lock and return */

                KiReleaseApcLock(&ApcLock);

                goto Quickie;

            }

 

            /* Dequeue the APC */

            RemoveEntryList(ApcListEntry);

            Apc->Inserted = FALSE;

 

            /* Go back to APC_LEVEL */

            KiReleaseApcLock(&ApcLock);

 

            /* Call the Kernel APC */

            KernelRoutine(Apc,

                          &NormalRoutine,

                          &NormalContext,

                          &SystemArgument1,

                       &nbsp 42 35456 42 14940 0 0 2646 0 0:00:13 0:00:05 0:00:08 2951 42 35456 42 14940 0 0 2260 0 0:00:15 0:00:06 0:00:09 3367;  &SystemArgument2);

 

            /* Make sure it returned correctly */

            if (KeGetCurrentIrql() != ApcLock.OldIrql)

            {

                KeBugCheckEx(IRQL_UNEXPECTED_VALUE,

                             (KeGetCurrentIrql() << 16) |

                             (ApcLock.OldIrql << 8),

                             (ULONG_PTR)KernelRoutine,

                             (ULONG_PTR)Apc,

                             (ULONG_PTR)NormalRoutine);

            }

 

            /* Check if there still is a Normal Routine */

            if (NormalRoutine)

            {

                /* At Passive Level, an APC can be prempted by a Special APC */

                Thread->ApcState.KernelApcInProgress = TRUE;

                KeLowerIrql(PASSIVE_LEVEL);

 

                /* Call and Raise IRQL back to APC_LEVEL */

                NormalRoutine(NormalContext, SystemArgument1, SystemArgument2);

                KeRaiseIrql(APC_LEVEL, &ApcLock.OldIrql);

            }

 

            /* Set Kernel APC in progress to false and loop again */

            Thread->ApcState.KernelApcInProgress = FALSE;

        }

    }

 

    /* Now we do the User APCs */

    if ((DeliveryMode == UserMode) &&

        !(IsListEmpty(&Thread->ApcState.ApcListHead[UserMode])) &&

         (Thread->ApcState.UserApcPending))

    {

        /* Lock the APC Queue */

        KiAcquireApcLockAtApcLevel(Thread, &ApcLock);

 

        /* It's not pending anymore */

        Thread->ApcState.UserApcPending = FALSE;

 

        /* Check if the list became empty now */

        if (IsListEmpty(&Thread->ApcState.ApcListHead[UserMode]))

        {

            /* It is, release the lock and break out */

            KiReleaseApcLock(&ApcLock);

            goto Quickie;

        }

 

        /* Get the actual APC object */

        ApcListEntry = Thread->ApcState.ApcListHead[UserMode].Flink;

        Apc = CONTAINING_RECORD(ApcListEntry, KAPC, ApcListEntry);

 

        /* Save Parameters so that it's safe to free the Object in the Kernel Routine*/

        NormalRoutine = Apc->NormalRoutine;

        KernelRoutine = Apc->KernelRoutine;

        NormalContext = Apc->NormalContext;

        SystemArgument1 = Apc->SystemArgument1;

        SystemArgument2 = Apc->SystemArgument2;

 

        /* Remove the APC from Queue, and release the lock */

        RemoveEntryList(ApcListEntry);

        Apc->Inserted = FALSE;

        KiReleaseApcLock(&ApcLock);

 

        /* Call the kernel routine */

        KernelRoutine(Apc,

                      &NormalRoutine,

                      &NormalContext,

                      &SystemArgument1,

                      &SystemArgument2);

 

        /* Check if there's no normal routine */

        if (!NormalRoutine)

        {

            /* Check if more User APCs are Pending */

            KeTestAlertThread(UserMode);

        }

        else

        {

            /* Set up the Trap Frame and prepare for Execution in NTDLL.DLL */

            KiInitializeUserApc(ExceptionFrame,

                                TrapFrame,

                                NormalRoutine,

                                NormalContext,

                                SystemArgument1,

                                SystemArgument2);

        }

    }

 

Quickie:

    /* Make sure we're still in the same process */

    if (Process != Thread->ApcState.Process)

    {

        /* Erm, we got attached or something! BAD! */

        KeBugCheckEx(INVALID_PROCESS_ATTACH_ATTEMPT,

                     (ULONG_PTR)Process,

                     (ULONG_PTR)Thread->ApcState.Process,

                     Thread->ApcStateIndex,

                     KeGetCurrentPrcb()->DpcRoutineActive);

    }

 

    /* Restore the trap frame */

    Thread->TrapFrame = OldTrapFrame;

}


An asynchronous procedure call (APC) is a function that executes asynchronously. APCs are similar to deferred procedure calls (DPCs), but unlike DPCs, APCs execute within the context of a particular thread. Drivers (other than file systems and file-system filter drivers) do not use APCs directly, but other parts of the operating system do, so you need to be aware of how APCs work.

The Windows operating system uses three kinds of APCs:


1. User APCs run strictly in user mode and only when the current thread is in an alertable wait state. The operating system uses user APCs to implement mechanisms such as overlapped I/O and the QueueUserApc Win32 routine.


2. Normal kernel APCs run in kernel mode at IRQL = PASSIVE_LEVEL. A normal kernel APC preempts all user-mode code, including user APCs. Normal kernel APCs are generally used by file systems and file-system filter drivers.


3. Special kernel APCs run in kernel mode at IRQL = APC_LEVEL. A special kernel APC preempts user-mode code and kernel-mode code that executes at IRQL = PASSIVE_LEVEL, including both user APCs and normal kernel APCs. The operating system uses special kernel APCs to handle operations such as I/O request completion.


其实总结一下很简单,一个用户模式的Apc,一个内核模式的Apc(分为NormalRoutine为NULL的SpecialKernelApc和不为NULL的NormalKernelApc)


区别在于, SpecialKernelApc只执行KernelRoutine, IRQL为APC_LEVEL,而 NormalKernelApc不仅仅执行 KernelRoutine还执行 NormalRoutine,在PASSIVE_LEVEL下执行NormalRoutine。


但是I/O API也没有说明他内部用的是哪个类型的Kernel APC,要稳一点就判断总开关 SpecialApcDisable。不过一般MSDN都会说明。



3. KeAreApcsDisabled/KeAreAllApcsDisabled


BOOLEAN

NTAPI

KeAreApcsDisabled(VOID)

{

    /* Return the Kernel APC State */

    return KeGetCurrentThread()->CombinedApcDisable ? TRUE : FALSE;

}

 

BOOLEAN

NTAPI

KeAreAllApcsDisabled(VOID)

{

    /* Return the Special APC State */

    return ((KeGetCurrentThread()->SpecialApcDisable) ||

            (KeGetCurrentIrql() >= APC_LEVEL)) ? TRUE : FALSE;

}

 

typedef struct _KTHREAD

{

......

    union

    {

        struct

        {

            SHORT KernelApcDisable;

            SHORT SpecialApcDisable;

        };

        ULONG CombinedApcDisable;

    };

......

};


主要看这两个API有什么区别,可能很多人看不出来什么区别。。。

我也理解了很久, KeAreApcsDisabled是说只要当前在内核临界区内就是Disable状态,这个可以是子开关KernelApcDisable或者是总开关SpecialApcDisable至少一个有值,要是 SpecialApcDisable则就是所有Apc都是无效状态(与KeAreAllApcsDisabled判断相同),要是 KernelApcDisable就是Normal KernelApc失效;要是用的 KeEnterCriticalRegion就只能用这个函数检查,一般用这个就可以了,当然最好if ( KeAreApcsDisabled() || __readcr8() == APC_LEVEL )


而 KeAreAllApcsDisabled则是真正意义上的所以APC都无效,但是对于 KeEnterCriticalRegion的临界区这个API是无法判断的。



4. 解决办法

    

当出现无法调用I/O API的时候,建议使用劳务线程,这个线程的执行环境还是比较稳定的,而且就算是做同步响应也不会耗时很久。例如:


typedef struct tag_FyWorkQueueItem

{

    WORK_QUEUE_ITEM WorkQueueItem;

    PVOID  lpParameter1;

    PVOID  lpParameter2;

    PVOID  lpParameter3;

    KEVENT CompleteEvent;

    BOOL   bStatus;

} FyWorkQueueItem, *PFyWorkQueueItem;

 

PUNICODE_STRING QueryProcessObjectName(IN HANDLE ProcessId)

{

    NTSTATUS Status = STATUS_SUCCESS;

    PEPROCESS EProcess = NULL;

    HANDLE hProcess = NULL;

    ULONG ulRealSize = 0;

    PUNICODE_STRING lpuniImageFileName = NULL;

    BOOL bSuccess = FALSE;

 

    if (KeGetCurrentIrql() <= APC_LEVEL)

    {

        Status = PsLookupProcessByProcessId(ProcessId, &EProcess);

        if (NT_SUCCESS(Status) && EProcess)

        {

            Status = ObOpenObjectByPointer((PVOID)EProcess, OBJ_KERNEL_HANDLE, NULL, 

                PROCESS_ALL_ACCESS, NULL, KernelMode, &hProcess);

 

            if (NT_SUCCESS(Status))

            {

                Status = ZwQueryInformationProcess(hProcess, ProcessImageFileName, NULL, 0, &ulRealSize);

                if (Status == STATUS_INFO_LENGTH_MISMATCH)

                {

                    lpuniImageFileName = (PUNICODE_STRING)ExAllocatePoolWithTag(NonPagedPool, 

                        ulRealSize + sizeof(UNICODE_STRING), 'hiti');

 

                    if (lpuniImageFileName)

                    {

                        memset(lpuniImageFileName, 0, ulRealSize + sizeof(UNICODE_STRING));

                        Status = ZwQueryInformationProcess(hProcess, ProcessImageFileName, 

                            lpuniImageFileName, ulRealSize + sizeof(UNICODE_STRING), &ulRealSize);

 

                        if (NT_SUCCESS(Status))

                        {

                            bSuccess = TRUE;

                        }

                    }

                }

                ZwClose(hProcess);

            }

            ObDereferenceObject(EProcess);

        }

    }

 

    if (!bSuccess)

    {

        ExFreePool(lpuniImageFileName);

        lpuniImageFileName = NULL;

    }

 

    return lpuniImageFileName;

}

 

BOOL GetProcessImageFileName(

    IN HANDLE ProcessId, 

    OUT WCHAR* lpwzImageFileName, 

    IN ULONG uMaxSize)

{

    NTSTATUS                   Status = STATUS_SUCCESS;

    HANDLE                     FileHandle = NULL;

    IO_STATUS_BLOCK            IoStatusBlock = { 0 };

    PUNICODE_STRING            lpuniProcessObjectName = NULL;

    OBJECT_ATTRIBUTES          oa = { 0 };

    PFILE_OBJECT               FileObject = NULL;

    POBJECT_NAME_INFORMATION   ObjectNameInformation = NULL;

    BOOL                       bStatus = FALSE;

 

    if (KeGetCurrentIrql() > PASSIVE_LEVEL) {

        return FALSE;

    }

 

    lpuniProcessObjectName = QueryProcessObjectName(ProcessId);

    if (!lpuniProcessObjectName) {

        return FALSE;

    }

 

    InitializeObjectAttributes(&oa, lpuniProcessObjectName, OBJ_KERNEL_HANDLE | OBJ_CASE_INSENSITIVE, NULL, NULL);

    Status = IoCreateFile(

        &FileHandle,

        FILE_READ_ATTRIBUTES,

        &oa,

        &IoStatusBlock,

        NULL,

        FILE_ATTRIBUTE_NORMAL,

        FILE_SHARE_READ | FILE_SHARE_WRITE,

        FILE_OPEN,

        FILE_NON_DIRECTORY_FILE,

        NULL,

        0,

        CreateFileTypeNone,

        NULL,

        IO_NO_PARAMETER_CHECKING);

 

    if (!NT_SUCCESS(Status))

    {

        ExFreePool(lpuniProcessObjectName);

        return FALSE;

    }

 

    Status = ObReferenceObjectByHandle(FileHandle, FILE_ANY_ACCESS, *IoFileObjectType, 

        KernelMode, (PVOID*)&FileObject, NULL);

 

    if (NT_SUCCESS(Status) && FileObject)

    {

        Status = IoQueryFileDosDeviceName(FileObject, &ObjectNameInformation);

        if (NT_SUCCESS(Status))

        {

            if (ObjectNameInformation)

            {

                if (ObjectNameInformation->Name.Length <= sizeof(WCHAR) * uMaxSize)

                {

                    memset(lpwzImageFileName, 0, 2 * uMaxSize);

                    memcpy(lpwzImageFileName, ObjectNameInformation->Name.Buffer, 

                        ObjectNameInformation->Name.Length);

 

                    bStatus = TRUE;

                }

                ExFreePool(ObjectNameInformation);

                ObjectNameInformation = NULL;

            }

        }

        ObDereferenceObject(FileObject);

    }

 

    ObCloseHandle(FileHandle, KernelMode);

    FileHandle = NULL;

    ExFreePool(lpuniProcessObjectName);

 

    return bStatus;

}

 

VOID QueryProcessFileNameWorkItem(IN PFyWorkQueueItem lpFyWorkQueueItem)

{

    lpFyWorkQueueItem->bStatus = GetProcessImageFileName(

        (HANDLE)lpFyWorkQueueItem->lpParameter1,

        (WCHAR*)lpFyWorkQueueItem->lpParameter2,

        (ULONG)lpFyWorkQueueItem->lpParameter3);

 

    KeSetEvent(&lpFyWorkQueueItem->CompleteEvent, IO_NO_INCREMENT, FALSE);

}

 

BOOL GetProcessImageFileNameSafeIrql(

    IN HANDLE ProcessId, 

    OUT WCHAR* lpwzImageFileName, 

    IN ULONG uMaxSize)

{

    BOOL            bStatus;

    FyWorkQueueItem WorkItem;

 

    if (KeGetCurrentIrql() <= APC_LEVEL)

    {

        if (KeAreApcsDisabled() || KeGetCurrentIrql() == APC_LEVEL)

        {

            memset(&WorkItem, 0, sizeof(WorkItem));

            KeInitializeEvent(&WorkItem.CompleteEvent, NotificationEvent, FALSE);

 

            WorkItem.bStatus = FALSE;

            WorkItem.WorkQueueItem.List.Flink = NULL;

            WorkItem.WorkQueueItem.WorkerRoutine = (PWORKER_THREAD_ROUTINE)QueryProcessFileNameWorkItem;

            WorkItem.lpParameter1 = (PVOID)ProcessId;

            WorkItem.lpParameter2 = (PVOID)lpwzImageFileName;

            WorkItem.lpParameter3 = (PVOID)uMaxSize;

            WorkItem.WorkQueueItem.Parameter = &WorkItem;

            ExQueueWorkItem(&WorkItem.WorkQueueItem, DelayedWorkQueue);

 

            KeWaitForSingleObject(&WorkItem.CompleteEvent, Executive, KernelMode, FALSE, NULL);

            bStatus = WorkItem.bStatus;

        }

        else

        {

            bStatus = GetProcessImageFileName(ProcessId, lpwzImageFileName, uMaxSize);

        }

    }

    else

    {

        bStatus = FALSE;

    }

 

    return bStatus;

}





本文由看雪论坛 FaEry  原创

转载请注明来自看雪社区


热门阅读



点击阅读原文/read,

更多干货等着你~

您可能也对以下帖子感兴趣

文章有问题?点此查看未经处理的缓存