diff --git a/CREDITS b/CREDITS index bb6278884f894878dc49e1c6722a0cbe3e89e82e..c56d8aa10131d8443f7c5b717ae2b13b2a1d376b 100644 --- a/CREDITS +++ b/CREDITS @@ -1197,6 +1197,13 @@ S: R. Tocantins, 89 - Cristo Rei S: 80050-430 - Curitiba - Paraná S: Brazil +N: Oded Gabbay +E: oded.gabbay@gmail.com +D: AMD KFD maintainer +S: 12 Shraga Raphaeli +S: Petah-Tikva, 4906418 +S: Israel + N: Kumar Gala E: galak@kernel.crashing.org D: Embedded PowerPC 6xx/7xx/74xx/82xx/83xx/85xx support diff --git a/Documentation/DocBook/drm.tmpl b/Documentation/DocBook/drm.tmpl index be35bc328b775948cd10d1c2cf3f8df76256aeb2..4b592ffbafeec8053e09c0c068045505e05757ec 100644 --- a/Documentation/DocBook/drm.tmpl +++ b/Documentation/DocBook/drm.tmpl @@ -492,10 +492,10 @@ char *date; The Translation Table Manager (TTM) - TTM design background and information belongs here. + TTM design background and information belongs here. - TTM initialization + TTM initialization This section is outdated. Drivers wishing to support TTM must fill out a drm_bo_driver @@ -503,42 +503,42 @@ char *date; pointers for initializing the TTM, allocating and freeing memory, waiting for command completion and fence synchronization, and memory migration. See the radeon_ttm.c file for an example of usage. - - - The ttm_global_reference structure is made up of several fields: - - - struct ttm_global_reference { - enum ttm_global_types global_type; - size_t size; - void *object; - int (*init) (struct ttm_global_reference *); - void (*release) (struct ttm_global_reference *); - }; - - - There should be one global reference structure for your memory - manager as a whole, and there will be others for each object - created by the memory manager at runtime. Your global TTM should - have a type of TTM_GLOBAL_TTM_MEM. The size field for the global - object should be sizeof(struct ttm_mem_global), and the init and - release hooks should point at your driver-specific init and - release routines, which probably eventually call - ttm_mem_global_init and ttm_mem_global_release, respectively. - - - Once your global TTM accounting structure is set up and initialized - by calling ttm_global_item_ref() on it, - you need to create a buffer object TTM to - provide a pool for buffer object allocation by clients and the - kernel itself. The type of this object should be TTM_GLOBAL_TTM_BO, - and its size should be sizeof(struct ttm_bo_global). Again, - driver-specific init and release functions may be provided, - likely eventually calling ttm_bo_global_init() and - ttm_bo_global_release(), respectively. Also, like the previous - object, ttm_global_item_ref() is used to create an initial reference - count for the TTM, which will call your initialization function. - + + + The ttm_global_reference structure is made up of several fields: + + + struct ttm_global_reference { + enum ttm_global_types global_type; + size_t size; + void *object; + int (*init) (struct ttm_global_reference *); + void (*release) (struct ttm_global_reference *); + }; + + + There should be one global reference structure for your memory + manager as a whole, and there will be others for each object + created by the memory manager at runtime. Your global TTM should + have a type of TTM_GLOBAL_TTM_MEM. The size field for the global + object should be sizeof(struct ttm_mem_global), and the init and + release hooks should point at your driver-specific init and + release routines, which probably eventually call + ttm_mem_global_init and ttm_mem_global_release, respectively. + + + Once your global TTM accounting structure is set up and initialized + by calling ttm_global_item_ref() on it, + you need to create a buffer object TTM to + provide a pool for buffer object allocation by clients and the + kernel itself. The type of this object should be TTM_GLOBAL_TTM_BO, + and its size should be sizeof(struct ttm_bo_global). Again, + driver-specific init and release functions may be provided, + likely eventually calling ttm_bo_global_init() and + ttm_bo_global_release(), respectively. Also, like the previous + object, ttm_global_item_ref() is used to create an initial reference + count for the TTM, which will call your initialization function. + @@ -566,19 +566,19 @@ char *date; using driver-specific ioctls. - On a fundamental level, GEM involves several operations: - - Memory allocation and freeing - Command execution - Aperture management at command execution time - - Buffer object allocation is relatively straightforward and largely + On a fundamental level, GEM involves several operations: + + Memory allocation and freeing + Command execution + Aperture management at command execution time + + Buffer object allocation is relatively straightforward and largely provided by Linux's shmem layer, which provides memory to back each object. Device-specific operations, such as command execution, pinning, buffer - read & write, mapping, and domain ownership transfers are left to + read & write, mapping, and domain ownership transfers are left to driver-specific ioctls. @@ -738,16 +738,16 @@ char *date; respectively. The conversion is handled by the DRM core without any driver-specific support. - - GEM also supports buffer sharing with dma-buf file descriptors through - PRIME. GEM-based drivers must use the provided helpers functions to - implement the exporting and importing correctly. See . - Since sharing file descriptors is inherently more secure than the - easily guessable and global GEM names it is the preferred buffer - sharing mechanism. Sharing buffers through GEM names is only supported - for legacy userspace. Furthermore PRIME also allows cross-device - buffer sharing since it is based on dma-bufs. - + + GEM also supports buffer sharing with dma-buf file descriptors through + PRIME. GEM-based drivers must use the provided helpers functions to + implement the exporting and importing correctly. See . + Since sharing file descriptors is inherently more secure than the + easily guessable and global GEM names it is the preferred buffer + sharing mechanism. Sharing buffers through GEM names is only supported + for legacy userspace. Furthermore PRIME also allows cross-device + buffer sharing since it is based on dma-bufs. + GEM Objects Mapping @@ -852,7 +852,7 @@ char *date; Command Execution - Perhaps the most important GEM function for GPU devices is providing a + Perhaps the most important GEM function for GPU devices is providing a command execution interface to clients. Client programs construct command buffers containing references to previously allocated memory objects, and then submit them to GEM. At that point, GEM takes care to @@ -874,95 +874,101 @@ char *date; GEM Function Reference !Edrivers/gpu/drm/drm_gem.c - - - VMA Offset Manager + + + VMA Offset Manager !Pdrivers/gpu/drm/drm_vma_manager.c vma offset manager !Edrivers/gpu/drm/drm_vma_manager.c !Iinclude/drm/drm_vma_manager.h - - - PRIME Buffer Sharing - - PRIME is the cross device buffer sharing framework in drm, originally - created for the OPTIMUS range of multi-gpu platforms. To userspace - PRIME buffers are dma-buf based file descriptors. - - - Overview and Driver Interface - - Similar to GEM global names, PRIME file descriptors are - also used to share buffer objects across processes. They offer - additional security: as file descriptors must be explicitly sent over - UNIX domain sockets to be shared between applications, they can't be - guessed like the globally unique GEM names. - - - Drivers that support the PRIME - API must set the DRIVER_PRIME bit in the struct - drm_driver - driver_features field, and implement the - prime_handle_to_fd and - prime_fd_to_handle operations. - - - int (*prime_handle_to_fd)(struct drm_device *dev, - struct drm_file *file_priv, uint32_t handle, - uint32_t flags, int *prime_fd); + + + PRIME Buffer Sharing + + PRIME is the cross device buffer sharing framework in drm, originally + created for the OPTIMUS range of multi-gpu platforms. To userspace + PRIME buffers are dma-buf based file descriptors. + + + Overview and Driver Interface + + Similar to GEM global names, PRIME file descriptors are + also used to share buffer objects across processes. They offer + additional security: as file descriptors must be explicitly sent over + UNIX domain sockets to be shared between applications, they can't be + guessed like the globally unique GEM names. + + + Drivers that support the PRIME + API must set the DRIVER_PRIME bit in the struct + drm_driver + driver_features field, and implement the + prime_handle_to_fd and + prime_fd_to_handle operations. + + + int (*prime_handle_to_fd)(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, int *prime_fd); int (*prime_fd_to_handle)(struct drm_device *dev, - struct drm_file *file_priv, int prime_fd, - uint32_t *handle); - Those two operations convert a handle to a PRIME file descriptor and - vice versa. Drivers must use the kernel dma-buf buffer sharing framework - to manage the PRIME file descriptors. Similar to the mode setting - API PRIME is agnostic to the underlying buffer object manager, as - long as handles are 32bit unsigned integers. - - - While non-GEM drivers must implement the operations themselves, GEM - drivers must use the drm_gem_prime_handle_to_fd - and drm_gem_prime_fd_to_handle helper functions. - Those helpers rely on the driver - gem_prime_export and - gem_prime_import operations to create a dma-buf - instance from a GEM object (dma-buf exporter role) and to create a GEM - object from a dma-buf instance (dma-buf importer role). - - - struct dma_buf * (*gem_prime_export)(struct drm_device *dev, - struct drm_gem_object *obj, - int flags); + struct drm_file *file_priv, int prime_fd, + uint32_t *handle); + Those two operations convert a handle to a PRIME file descriptor and + vice versa. Drivers must use the kernel dma-buf buffer sharing framework + to manage the PRIME file descriptors. Similar to the mode setting + API PRIME is agnostic to the underlying buffer object manager, as + long as handles are 32bit unsigned integers. + + + While non-GEM drivers must implement the operations themselves, GEM + drivers must use the drm_gem_prime_handle_to_fd + and drm_gem_prime_fd_to_handle helper functions. + Those helpers rely on the driver + gem_prime_export and + gem_prime_import operations to create a dma-buf + instance from a GEM object (dma-buf exporter role) and to create a GEM + object from a dma-buf instance (dma-buf importer role). + + + struct dma_buf * (*gem_prime_export)(struct drm_device *dev, + struct drm_gem_object *obj, + int flags); struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev, - struct dma_buf *dma_buf); - These two operations are mandatory for GEM drivers that support - PRIME. - - - - PRIME Helper Functions -!Pdrivers/gpu/drm/drm_prime.c PRIME Helpers + struct dma_buf *dma_buf); + These two operations are mandatory for GEM drivers that support + PRIME. + - - - PRIME Function References + + PRIME Helper Functions +!Pdrivers/gpu/drm/drm_prime.c PRIME Helpers + + + + PRIME Function References !Edrivers/gpu/drm/drm_prime.c - - - DRM MM Range Allocator - - Overview + + + DRM MM Range Allocator + + Overview !Pdrivers/gpu/drm/drm_mm.c Overview - - - LRU Scan/Eviction Support + + + LRU Scan/Eviction Support !Pdrivers/gpu/drm/drm_mm.c lru scan roaster - + - - DRM MM Range Allocator Function References + + DRM MM Range Allocator Function References !Edrivers/gpu/drm/drm_mm.c !Iinclude/drm/drm_mm.h - + + + CMA Helper Functions Reference +!Pdrivers/gpu/drm/drm_gem_cma_helper.c cma helpers +!Edrivers/gpu/drm/drm_gem_cma_helper.c +!Iinclude/drm/drm_gem_cma_helper.h + @@ -994,6 +1000,10 @@ int max_width, max_height; Display Modes Function Reference !Iinclude/drm/drm_modes.h !Edrivers/gpu/drm/drm_modes.c + + + Atomic Mode Setting Function Reference +!Edrivers/gpu/drm/drm_atomic.c Frame Buffer Creation @@ -1825,6 +1835,10 @@ void intel_crt_init(struct drm_device *dev) KMS API Functions !Edrivers/gpu/drm/drm_crtc.c + + + KMS Data Structures +!Iinclude/drm/drm_crtc.h KMS Locking @@ -1933,10 +1947,16 @@ void intel_crt_init(struct drm_device *dev) and then retrieves a list of modes by calling the connector get_modes helper operation. + + If the helper operation returns no mode, and if the connector status + is connector_status_connected, standard VESA DMT modes up to + 1024x768 are automatically added to the modes list by a call to + drm_add_modes_noedid. + - The function filters out modes larger than + The function then filters out modes larger than max_width and max_height - if specified. It then calls the optional connector + if specified. It finally calls the optional connector mode_valid helper operation for each mode in the probed list to check whether the mode is valid for the connector. @@ -2076,11 +2096,19 @@ void intel_crt_init(struct drm_device *dev) int (*get_modes)(struct drm_connector *connector); Fill the connector's probed_modes list - by parsing EDID data with drm_add_edid_modes or - calling drm_mode_probed_add directly for every + by parsing EDID data with drm_add_edid_modes, + adding standard VESA DMT modes with drm_add_modes_noedid, + or calling drm_mode_probed_add directly for every supported mode and return the number of modes it has detected. This operation is mandatory. + + Note that the caller function will automatically add standard VESA + DMT modes up to 1024x768 if the get_modes + helper operation returns no mode and if the connector status is + connector_status_connected. There is no need to call + drm_add_edid_modes manually in that case. + When adding modes manually the driver creates each mode with a call to drm_mode_create and must fill the following fields. @@ -2278,7 +2306,7 @@ void intel_crt_init(struct drm_device *dev) drm_helper_probe_single_connector_modes. - When parsing EDID data, drm_add_edid_modes fill the + When parsing EDID data, drm_add_edid_modes fills the connector display_info width_mm and height_mm fields. When creating modes @@ -2315,9 +2343,27 @@ void intel_crt_init(struct drm_device *dev) + + Atomic Modeset Helper Functions Reference + + Overview +!Pdrivers/gpu/drm/drm_atomic_helper.c overview + + + Implementing Asynchronous Atomic Commit +!Pdrivers/gpu/drm/drm_atomic_helper.c implementing async commit + + + Atomic State Reset and Initialization +!Pdrivers/gpu/drm/drm_atomic_helper.c atomic state reset and initialization + +!Iinclude/drm/drm_atomic_helper.h +!Edrivers/gpu/drm/drm_atomic_helper.c + Modeset Helper Functions Reference !Edrivers/gpu/drm/drm_crtc_helper.c +!Pdrivers/gpu/drm/drm_crtc_helper.c overview Output Probing Helper Functions Reference @@ -2341,6 +2387,12 @@ void intel_crt_init(struct drm_device *dev) !Pdrivers/gpu/drm/drm_dp_mst_topology.c dp mst helper !Iinclude/drm/drm_dp_mst_helper.h !Edrivers/gpu/drm/drm_dp_mst_topology.c + + + MIPI DSI Helper Functions Reference +!Pdrivers/gpu/drm/drm_mipi_dsi.c dsi helpers +!Iinclude/drm/drm_mipi_dsi.h +!Edrivers/gpu/drm/drm_mipi_dsi.c EDID Helper Functions Reference @@ -2371,7 +2423,12 @@ void intel_crt_init(struct drm_device *dev) Plane Helper Reference -!Edrivers/gpu/drm/drm_plane_helper.c Plane Helpers +!Edrivers/gpu/drm/drm_plane_helper.c +!Pdrivers/gpu/drm/drm_plane_helper.c overview + + + Tile group +!Pdrivers/gpu/drm/drm_crtc.c Tile group @@ -2507,8 +2564,8 @@ void intel_crt_init(struct drm_device *dev) Description/Restrictions - DRM - Generic + DRM + Generic “EDID” BLOB | IMMUTABLE 0 @@ -2523,6 +2580,20 @@ void intel_crt_init(struct drm_device *dev) Contains DPMS operation mode value. + “PATH” + BLOB | IMMUTABLE + 0 + Connector + Contains topology path to a connector. + + + “TILE” + BLOB | IMMUTABLE + 0 + Connector + Contains tiling information for a connector. + + Plane “type” ENUM | IMMUTABLE @@ -2638,6 +2709,21 @@ void intel_crt_init(struct drm_device *dev) TBD + Virtual GPU + “suggested X” + RANGE + Min=0, Max=0xffffffff + Connector + property to suggest an X offset for a connector + + + “suggested Y” + RANGE + Min=0, Max=0xffffffff + Connector + property to suggest an Y offset for a connector + + Optional “scaling mode” ENUM @@ -3787,6 +3873,26 @@ int num_ioctls; blocks. This excludes a set of SoC platforms with an SGX rendering unit, those have basic support through the gma500 drm driver. + + Core Driver Infrastructure + + This section covers core driver infrastructure used by both the display + and the GEM parts of the driver. + + + Runtime Power Management +!Pdrivers/gpu/drm/i915/intel_runtime_pm.c runtime pm +!Idrivers/gpu/drm/i915/intel_runtime_pm.c + + + Interrupt Handling +!Pdrivers/gpu/drm/i915/i915_irq.c interrupt handling +!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_init intel_irq_init_hw intel_hpd_init +!Fdrivers/gpu/drm/i915/i915_irq.c intel_irq_fini +!Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_disable_interrupts +!Fdrivers/gpu/drm/i915/i915_irq.c intel_runtime_pm_enable_interrupts + + Display Hardware Handling @@ -3803,6 +3909,18 @@ int num_ioctls; configuration change. + + Frontbuffer Tracking +!Pdrivers/gpu/drm/i915/intel_frontbuffer.c frontbuffer tracking +!Idrivers/gpu/drm/i915/intel_frontbuffer.c +!Fdrivers/gpu/drm/i915/intel_drv.h intel_frontbuffer_flip +!Fdrivers/gpu/drm/i915/i915_gem.c i915_gem_track_fb + + + Display FIFO Underrun Reporting +!Pdrivers/gpu/drm/i915/intel_fifo_underrun.c fifo underrun handling +!Idrivers/gpu/drm/i915/intel_fifo_underrun.c + Plane Configuration @@ -3822,6 +3940,16 @@ int num_ioctls; probing, so those sections fully apply. + + High Definition Audio +!Pdrivers/gpu/drm/i915/intel_audio.c High Definition Audio over HDMI and Display Port +!Idrivers/gpu/drm/i915/intel_audio.c + + + Panel Self Refresh PSR (PSR/SRD) +!Pdrivers/gpu/drm/i915/intel_psr.c Panel Self Refresh (PSR/SRD) +!Idrivers/gpu/drm/i915/intel_psr.c + DPIO !Pdrivers/gpu/drm/i915/i915_reg.h DPIO @@ -3931,6 +4059,28 @@ int num_ioctls; !Idrivers/gpu/drm/i915/intel_lrc.c + + + Tracing + + This sections covers all things related to the tracepoints implemented in + the i915 driver. + + + i915_ppgtt_create and i915_ppgtt_release +!Pdrivers/gpu/drm/i915/i915_trace.h i915_ppgtt_create and i915_ppgtt_release tracepoints + + + i915_context_create and i915_context_free +!Pdrivers/gpu/drm/i915/i915_trace.h i915_context_create and i915_context_free tracepoints + + + switch_mm +!Pdrivers/gpu/drm/i915/i915_trace.h switch_mm tracepoint + + + +!Cdrivers/gpu/drm/i915/i915_irq.c diff --git a/Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt b/Documentation/devicetree/bindings/drm/imx/fsl-imx-drm.txt similarity index 100% rename from Documentation/devicetree/bindings/staging/imx-drm/fsl-imx-drm.txt rename to Documentation/devicetree/bindings/drm/imx/fsl-imx-drm.txt diff --git a/Documentation/devicetree/bindings/staging/imx-drm/hdmi.txt b/Documentation/devicetree/bindings/drm/imx/hdmi.txt similarity index 100% rename from Documentation/devicetree/bindings/staging/imx-drm/hdmi.txt rename to Documentation/devicetree/bindings/drm/imx/hdmi.txt diff --git a/Documentation/devicetree/bindings/staging/imx-drm/ldb.txt b/Documentation/devicetree/bindings/drm/imx/ldb.txt similarity index 100% rename from Documentation/devicetree/bindings/staging/imx-drm/ldb.txt rename to Documentation/devicetree/bindings/drm/imx/ldb.txt diff --git a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt index b48f4ef31d937ff8c4944e379d55357045653f02..4c32ef0b7db8fd635285b63722771a1a5ee9f848 100644 --- a/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt +++ b/Documentation/devicetree/bindings/gpu/nvidia,tegra20-host1x.txt @@ -191,6 +191,8 @@ of the following host1x client modules: - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection - nvidia,edid: supplies a binary EDID blob - nvidia,panel: phandle of a display panel + - nvidia,ganged-mode: contains a phandle to a second DSI controller to gang + up with in order to support up to 8 data lanes - sor: serial output resource diff --git a/Documentation/devicetree/bindings/gpu/st,stih4xx.txt b/Documentation/devicetree/bindings/gpu/st,stih4xx.txt index 2d150c311a0539e0caa9ab302bf9943c18eb0fc1..c99eb34e640b6782c066758f20922b21be6f75f0 100644 --- a/Documentation/devicetree/bindings/gpu/st,stih4xx.txt +++ b/Documentation/devicetree/bindings/gpu/st,stih4xx.txt @@ -68,7 +68,7 @@ STMicroelectronics stih4xx platforms number of clocks may depend of the SoC type. - clock-names: names of the clocks listed in clocks property in the same order. - - hdmi,hpd-gpio: gpio id to detect if an hdmi cable is plugged or not. + - ddc: phandle of an I2C controller used for DDC EDID probing sti-hda: Required properties: @@ -83,6 +83,22 @@ sti-hda: - clock-names: names of the clocks listed in clocks property in the same order. +sti-hqvdp: + must be a child of sti-display-subsystem + Required properties: + - compatible: "st,stih-hqvdp" + - reg: Physical base address of the IP registers and length of memory mapped region. + - clocks: from common clock binding: handle hardware IP needed clocks, the + number of clocks may depend of the SoC type. + See ../clocks/clock-bindings.txt for details. + - clock-names: names of the clocks listed in clocks property in the same + order. + - resets: resets to be used by the device + See ../reset/reset.txt for details. + - reset-names: names of the resets listed in resets property in the same + order. + - st,vtg: phandle on vtg main device node. + Example: / { @@ -173,7 +189,6 @@ Example: interrupt-names = "irq"; clock-names = "pix", "tmds", "phy", "audio"; clocks = <&clockgen_c_vcc CLK_S_PIX_HDMI>, <&clockgen_c_vcc CLK_S_TMDS_HDMI>, <&clockgen_c_vcc CLK_S_HDMI_REJECT_PLL>, <&clockgen_b1 CLK_S_PCM_0>; - hdmi,hpd-gpio = <&PIO2 5>; }; sti-hda@fe85a000 { @@ -184,6 +199,16 @@ Example: clocks = <&clockgen_c_vcc CLK_S_PIX_HD>, <&clockgen_c_vcc CLK_S_HDDAC>; }; }; + + sti-hqvdp@9c000000 { + compatible = "st,stih407-hqvdp"; + reg = <0x9C00000 0x100000>; + clock-names = "hqvdp", "pix_main"; + clocks = <&clk_s_c0_flexgen CLK_MAIN_DISP>, <&clk_s_d2_flexgen CLK_PIX_MAIN_DISP>; + reset-names = "hqvdp"; + resets = <&softreset STIH407_HDQVDP_SOFTRESET>; + st,vtg = <&vtg_main>; + }; }; ... }; diff --git a/Documentation/devicetree/bindings/panel/auo,b116xw03.txt b/Documentation/devicetree/bindings/panel/auo,b116xw03.txt new file mode 100644 index 0000000000000000000000000000000000000000..690d0a568ef392e162505d7e78aebcb96f061f0b --- /dev/null +++ b/Documentation/devicetree/bindings/panel/auo,b116xw03.txt @@ -0,0 +1,7 @@ +AU Optronics Corporation 11.6" HD (1366x768) color TFT-LCD panel + +Required properties: +- compatible: should be "auo,b116xw03" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt b/Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt new file mode 100644 index 0000000000000000000000000000000000000000..7da1d5c038ffb46de435d93081b860d601f55b76 --- /dev/null +++ b/Documentation/devicetree/bindings/panel/hannstar,hsd070pww1.txt @@ -0,0 +1,7 @@ +HannStar Display Corp. HSD070PWW1 7.0" WXGA TFT LCD panel + +Required properties: +- compatible: should be "hannstar,hsd070pww1" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt b/Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt new file mode 100644 index 0000000000000000000000000000000000000000..04caaae19af6f943bd6c5cf9689ce02793caade2 --- /dev/null +++ b/Documentation/devicetree/bindings/panel/hit,tx23d38vm0caa.txt @@ -0,0 +1,7 @@ +Hitachi Ltd. Corporation 9" WVGA (800x480) TFT LCD panel + +Required properties: +- compatible: should be "hit,tx23d38vm0caa" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt b/Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt new file mode 100644 index 0000000000000000000000000000000000000000..2743b07cd2f22e2073720ecc4c0c2cdb79a868b3 --- /dev/null +++ b/Documentation/devicetree/bindings/panel/innolux,g121i1-l01.txt @@ -0,0 +1,7 @@ +Innolux Corporation 12.1" WXGA (1280x800) TFT LCD panel + +Required properties: +- compatible: should be "innolux,g121i1-l01" + +This binding is compatible with the simple-panel binding, which is specified +in simple-panel.txt in this directory. diff --git a/Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt b/Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt new file mode 100644 index 0000000000000000000000000000000000000000..f522bb8e47e1c7dc39dd7e8e21d6387bbc492df2 --- /dev/null +++ b/Documentation/devicetree/bindings/panel/sharp,lq101r1sx01.txt @@ -0,0 +1,49 @@ +Sharp Microelectronics 10.1" WQXGA TFT LCD panel + +This panel requires a dual-channel DSI host to operate. It supports two modes: +- left-right: each channel drives the left or right half of the screen +- even-odd: each channel drives the even or odd lines of the screen + +Each of the DSI channels controls a separate DSI peripheral. The peripheral +driven by the first link (DSI-LINK1), left or even, is considered the primary +peripheral and controls the device. The 'link2' property contains a phandle +to the peripheral driven by the second link (DSI-LINK2, right or odd). + +Note that in video mode the DSI-LINK1 interface always provides the left/even +pixels and DSI-LINK2 always provides the right/odd pixels. In command mode it +is possible to program either link to drive the left/even or right/odd pixels +but for the sake of consistency this binding assumes that the same assignment +is chosen as for video mode. + +Required properties: +- compatible: should be "sharp,lq101r1sx01" +- reg: DSI virtual channel of the peripheral + +Required properties (for DSI-LINK1 only): +- link2: phandle to the DSI peripheral on the secondary link. Note that the + presence of this property marks the containing node as DSI-LINK1. +- power-supply: phandle of the regulator that provides the supply voltage + +Optional properties (for DSI-LINK1 only): +- backlight: phandle of the backlight device attached to the panel + +Example: + + dsi@54300000 { + panel: panel@0 { + compatible = "sharp,lq101r1sx01"; + reg = <0>; + + link2 = <&secondary>; + + power-supply = <...>; + backlight = <...>; + }; + }; + + dsi@54400000 { + secondary: panel@0 { + compatible = "sharp,lq101r1sx01"; + reg = <0>; + }; + }; diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt index cc6151c431c86ba1510a5404c3135ce8a03e301e..423d47418e72e984558914990dcaac3c26a5985f 100644 --- a/Documentation/devicetree/bindings/vendor-prefixes.txt +++ b/Documentation/devicetree/bindings/vendor-prefixes.txt @@ -66,8 +66,10 @@ gmt Global Mixed-mode Technology, Inc. google Google, Inc. gumstix Gumstix, Inc. gw Gateworks Corporation +hannstar HannStar Display Corporation haoyu Haoyu Microelectronic Co. Ltd. hisilicon Hisilicon Limited. +hit Hitachi Ltd. honeywell Honeywell hp Hewlett Packard i2se I2SE GmbH diff --git a/Documentation/devicetree/bindings/video/adi,adv7511.txt b/Documentation/devicetree/bindings/video/adi,adv7511.txt new file mode 100644 index 0000000000000000000000000000000000000000..96c25ee01501fdad9b5d402e78b30de0d54f3cf0 --- /dev/null +++ b/Documentation/devicetree/bindings/video/adi,adv7511.txt @@ -0,0 +1,88 @@ +Analog Device ADV7511(W)/13 HDMI Encoders +----------------------------------------- + +The ADV7511, ADV7511W and ADV7513 are HDMI audio and video transmitters +compatible with HDMI 1.4 and DVI 1.0. They support color space conversion, +S/PDIF, CEC and HDCP. + +Required properties: + +- compatible: Should be one of "adi,adv7511", "adi,adv7511w" or "adi,adv7513" +- reg: I2C slave address + +The ADV7511 supports a large number of input data formats that differ by their +color depth, color format, clock mode, bit justification and random +arrangement of components on the data bus. The combination of the following +properties describe the input and map directly to the video input tables of the +ADV7511 datasheet that document all the supported combinations. + +- adi,input-depth: Number of bits per color component at the input (8, 10 or + 12). +- adi,input-colorspace: The input color space, one of "rgb", "yuv422" or + "yuv444". +- adi,input-clock: The input clock type, one of "1x" (one clock cycle per + pixel), "2x" (two clock cycles per pixel), "ddr" (one clock cycle per pixel, + data driven on both edges). + +The following input format properties are required except in "rgb 1x" and +"yuv444 1x" modes, in which case they must not be specified. + +- adi,input-style: The input components arrangement variant (1, 2 or 3), as + listed in the input format tables in the datasheet. +- adi,input-justification: The input bit justification ("left", "evenly", + "right"). + +Optional properties: + +- interrupts: Specifier for the ADV7511 interrupt +- pd-gpios: Specifier for the GPIO connected to the power down signal + +- adi,clock-delay: Video data clock delay relative to the pixel clock, in ps + (-1200 ps .. 1600 ps). Defaults to no delay. +- adi,embedded-sync: The input uses synchronization signals embedded in the + data stream (similar to BT.656). Defaults to separate H/V synchronization + signals. + +Required nodes: + +The ADV7511 has two video ports. Their connections are modelled using the OF +graph bindings specified in Documentation/devicetree/bindings/graph.txt. + +- Video port 0 for the RGB or YUV input +- Video port 1 for the HDMI output + + +Example +------- + + adv7511w: hdmi@39 { + compatible = "adi,adv7511w"; + reg = <39>; + interrupt-parent = <&gpio3>; + interrupts = <29 IRQ_TYPE_EDGE_FALLING>; + + adi,input-depth = <8>; + adi,input-colorspace = "rgb"; + adi,input-clock = "1x"; + adi,input-style = <1>; + adi,input-justification = "evenly"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + adv7511w_in: endpoint { + remote-endpoint = <&dpi_out>; + }; + }; + + port@1 { + reg = <1>; + adv7511_out: endpoint { + remote-endpoint = <&hdmi_connector_in>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/video/exynos_dsim.txt b/Documentation/devicetree/bindings/video/exynos_dsim.txt index e74243b4b317ff74728c50b09e66b5f9347f6d69..ca2b4aacd9afb5e81d508df0371b4eb5b7f4f9b0 100644 --- a/Documentation/devicetree/bindings/video/exynos_dsim.txt +++ b/Documentation/devicetree/bindings/video/exynos_dsim.txt @@ -4,6 +4,7 @@ Required properties: - compatible: value should be one of the following "samsung,exynos3250-mipi-dsi" /* for Exynos3250/3472 SoCs */ "samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */ + "samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */ "samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */ - reg: physical base address and length of the registers set for the device - interrupts: should contain DSI interrupt diff --git a/Documentation/devicetree/bindings/video/rockchip-drm.txt b/Documentation/devicetree/bindings/video/rockchip-drm.txt new file mode 100644 index 0000000000000000000000000000000000000000..7fff582495a228fae7e7c21445320a9d7cf5fb4f --- /dev/null +++ b/Documentation/devicetree/bindings/video/rockchip-drm.txt @@ -0,0 +1,19 @@ +Rockchip DRM master device +================================ + +The Rockchip DRM master device is a virtual device needed to list all +vop devices or other display interface nodes that comprise the +graphics subsystem. + +Required properties: +- compatible: Should be "rockchip,display-subsystem" +- ports: Should contain a list of phandles pointing to display interface port + of vop devices. vop definitions as defined in + Documentation/devicetree/bindings/video/rockchip-vop.txt + +example: + +display-subsystem { + compatible = "rockchip,display-subsystem"; + ports = <&vopl_out>, <&vopb_out>; +}; diff --git a/Documentation/devicetree/bindings/video/rockchip-vop.txt b/Documentation/devicetree/bindings/video/rockchip-vop.txt new file mode 100644 index 0000000000000000000000000000000000000000..d15351f2313d662195ca1732e83de94e8760d015 --- /dev/null +++ b/Documentation/devicetree/bindings/video/rockchip-vop.txt @@ -0,0 +1,58 @@ +device-tree bindings for rockchip soc display controller (vop) + +VOP (Visual Output Processor) is the Display Controller for the Rockchip +series of SoCs which transfers the image data from a video memory +buffer to an external LCD interface. + +Required properties: +- compatible: value should be one of the following + "rockchip,rk3288-vop"; + +- interrupts: should contain a list of all VOP IP block interrupts in the + order: VSYNC, LCD_SYSTEM. The interrupt specifier + format depends on the interrupt controller used. + +- clocks: must include clock specifiers corresponding to entries in the + clock-names property. + +- clock-names: Must contain + aclk_vop: for ddr buffer transfer. + hclk_vop: for ahb bus to R/W the phy regs. + dclk_vop: pixel clock. + +- resets: Must contain an entry for each entry in reset-names. + See ../reset/reset.txt for details. +- reset-names: Must include the following entries: + - axi + - ahb + - dclk + +- iommus: required a iommu node + +- port: A port node with endpoint definitions as defined in + Documentation/devicetree/bindings/media/video-interfaces.txt. + +Example: +SoC specific DT entry: + vopb: vopb@ff930000 { + compatible = "rockchip,rk3288-vop"; + reg = <0xff930000 0x19c>; + interrupts = ; + clocks = <&cru ACLK_VOP0>, <&cru DCLK_VOP0>, <&cru HCLK_VOP0>; + clock-names = "aclk_vop", "dclk_vop", "hclk_vop"; + resets = <&cru SRST_LCDC1_AXI>, <&cru SRST_LCDC1_AHB>, <&cru SRST_LCDC1_DCLK>; + reset-names = "axi", "ahb", "dclk"; + iommus = <&vopb_mmu>; + vopb_out: port { + #address-cells = <1>; + #size-cells = <0>; + vopb_out_edp: endpoint@0 { + reg = <0>; + remote-endpoint=<&edp_in_vopb>; + }; + vopb_out_hdmi: endpoint@1 { + reg = <1>; + remote-endpoint=<&hdmi_in_vopb>; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/video/samsung-fimd.txt b/Documentation/devicetree/bindings/video/samsung-fimd.txt index 4e6c77c85546790bbb8b85c6528e31287ac9e81a..cf1af637102169a0c2b8059e4440132fce0d6bb2 100644 --- a/Documentation/devicetree/bindings/video/samsung-fimd.txt +++ b/Documentation/devicetree/bindings/video/samsung-fimd.txt @@ -11,6 +11,7 @@ Required properties: "samsung,s5pv210-fimd"; /* for S5PV210 SoC */ "samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */ "samsung,exynos4210-fimd"; /* for Exynos4 SoCs */ + "samsung,exynos4415-fimd"; /* for Exynos4415 SoC */ "samsung,exynos5250-fimd"; /* for Exynos5 SoCs */ - reg: physical base address and length of the FIMD registers set. diff --git a/MAINTAINERS b/MAINTAINERS index fdffe962a16af4aba924bac3ea230149bac5825e..c690b5a0d7b7ffbcc3271795444b8ecbb48f89ca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -618,6 +618,16 @@ S: Maintained F: drivers/iommu/amd_iommu*.[ch] F: include/linux/amd-iommu.h +AMD KFD +M: Oded Gabbay +L: dri-devel@lists.freedesktop.org +T: git git://people.freedesktop.org/~gabbayo/linux.git +S: Supported +F: drivers/gpu/drm/amd/amdkfd/ +F: drivers/gpu/drm/radeon/radeon_kfd.c +F: drivers/gpu/drm/radeon/radeon_kfd.h +F: include/uapi/linux/kfd_ioctl.h + AMD MICROCODE UPDATE SUPPORT M: Andreas Herrmann L: amd64-microcode@amd64.org @@ -3297,6 +3307,13 @@ F: drivers/gpu/drm/exynos/ F: include/drm/exynos* F: include/uapi/drm/exynos* +DRM DRIVERS FOR FREESCALE IMX +M: Philipp Zabel +L: dri-devel@lists.freedesktop.org +S: Maintained +F: drivers/gpu/drm/imx/ +F: Documentation/devicetree/bindings/drm/imx/ + DRM DRIVERS FOR NVIDIA TEGRA M: Thierry Reding M: Terje Bergström diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c index b47262afb2408c45d5ab10ca3ffc512bc2b14af2..f8197eb6e5669ada1b8ddd57e7bb09e42bedfe62 100644 --- a/arch/arm/mach-shmobile/board-lager.c +++ b/arch/arm/mach-shmobile/board-lager.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -83,61 +82,6 @@ * */ -/* DU */ -static struct rcar_du_encoder_data lager_du_encoders[] = { - { - .type = RCAR_DU_ENCODER_VGA, - .output = RCAR_DU_OUTPUT_DPAD0, - }, { - .type = RCAR_DU_ENCODER_NONE, - .output = RCAR_DU_OUTPUT_LVDS1, - .connector.lvds.panel = { - .width_mm = 210, - .height_mm = 158, - .mode = { - .pixelclock = 65000000, - .hactive = 1024, - .hfront_porch = 20, - .hback_porch = 160, - .hsync_len = 136, - .vactive = 768, - .vfront_porch = 3, - .vback_porch = 29, - .vsync_len = 6, - }, - }, - }, -}; - -static const struct rcar_du_platform_data lager_du_pdata __initconst = { - .encoders = lager_du_encoders, - .num_encoders = ARRAY_SIZE(lager_du_encoders), -}; - -static const struct resource du_resources[] __initconst = { - DEFINE_RES_MEM(0xfeb00000, 0x70000), - DEFINE_RES_MEM_NAMED(0xfeb90000, 0x1c, "lvds.0"), - DEFINE_RES_MEM_NAMED(0xfeb94000, 0x1c, "lvds.1"), - DEFINE_RES_IRQ(gic_spi(256)), - DEFINE_RES_IRQ(gic_spi(268)), - DEFINE_RES_IRQ(gic_spi(269)), -}; - -static void __init lager_add_du_device(void) -{ - struct platform_device_info info = { - .name = "rcar-du-r8a7790", - .id = -1, - .res = du_resources, - .num_res = ARRAY_SIZE(du_resources), - .data = &lager_du_pdata, - .size_data = sizeof(lager_du_pdata), - .dma_mask = DMA_BIT_MASK(32), - }; - - platform_device_register_full(&info); -} - /* LEDS */ static struct gpio_led lager_leds[] = { { @@ -800,8 +744,6 @@ static void __init lager_add_standard_devices(void) platform_device_register_full(ðer_info); - lager_add_du_device(); - platform_device_register_resndata(NULL, "qspi", 0, qspi_resources, ARRAY_SIZE(qspi_resources), diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c index 994dc7d86ae241fff8ebba14948cefd1f4fff313..598f704f76ae7420c96f420c9d3c9b63bbf09edc 100644 --- a/arch/arm/mach-shmobile/board-marzen.c +++ b/arch/arm/mach-shmobile/board-marzen.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -171,62 +170,6 @@ static struct platform_device hspi_device = { .num_resources = ARRAY_SIZE(hspi_resources), }; -/* - * DU - * - * The panel only specifies the [hv]display and [hv]total values. The position - * and width of the sync pulses don't matter, they're copied from VESA timings. - */ -static struct rcar_du_encoder_data du_encoders[] = { - { - .type = RCAR_DU_ENCODER_VGA, - .output = RCAR_DU_OUTPUT_DPAD0, - }, { - .type = RCAR_DU_ENCODER_LVDS, - .output = RCAR_DU_OUTPUT_DPAD1, - .connector.lvds.panel = { - .width_mm = 210, - .height_mm = 158, - .mode = { - .pixelclock = 65000000, - .hactive = 1024, - .hfront_porch = 20, - .hback_porch = 160, - .hsync_len = 136, - .vactive = 768, - .vfront_porch = 3, - .vback_porch = 29, - .vsync_len = 6, - }, - }, - }, -}; - -static const struct rcar_du_platform_data du_pdata __initconst = { - .encoders = du_encoders, - .num_encoders = ARRAY_SIZE(du_encoders), -}; - -static const struct resource du_resources[] __initconst = { - DEFINE_RES_MEM(0xfff80000, 0x40000), - DEFINE_RES_IRQ(gic_iid(0x3f)), -}; - -static void __init marzen_add_du_device(void) -{ - struct platform_device_info info = { - .name = "rcar-du-r8a7779", - .id = -1, - .res = du_resources, - .num_res = ARRAY_SIZE(du_resources), - .data = &du_pdata, - .size_data = sizeof(du_pdata), - .dma_mask = DMA_BIT_MASK(32), - }; - - platform_device_register_full(&info); -} - /* LEDS */ static struct gpio_led marzen_leds[] = { { @@ -385,7 +328,6 @@ static void __init marzen_init(void) platform_device_register_full(&vin1_info); platform_device_register_full(&vin3_info); platform_add_devices(marzen_devices, ARRAY_SIZE(marzen_devices)); - marzen_add_du_device(); } static const char *marzen_boards_compat_dt[] __initdata = { diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 2e1a6853e00cec950bdd6e0f11586a57c062a05d..fe9f0b79a18b321bbc80711b7e71dde49b7436e5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -455,6 +455,23 @@ struct intel_stolen_funcs { u32 (*base)(int num, int slot, int func, size_t size); }; +static size_t __init gen9_stolen_size(int num, int slot, int func) +{ + u16 gmch_ctrl; + + gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); + gmch_ctrl >>= BDW_GMCH_GMS_SHIFT; + gmch_ctrl &= BDW_GMCH_GMS_MASK; + + if (gmch_ctrl < 0xf0) + return gmch_ctrl << 25; /* 32 MB units */ + else + /* 4MB increments starting at 0xf0 for 4MB */ + return (gmch_ctrl - 0xf0 + 1) << 22; +} + +typedef size_t (*stolen_size_fn)(int num, int slot, int func); + static const struct intel_stolen_funcs i830_stolen_funcs __initconst = { .base = i830_stolen_base, .size = i830_stolen_size, @@ -490,6 +507,11 @@ static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = { .size = gen8_stolen_size, }; +static const struct intel_stolen_funcs gen9_stolen_funcs __initconst = { + .base = intel_stolen_base, + .size = gen9_stolen_size, +}; + static const struct intel_stolen_funcs chv_stolen_funcs __initconst = { .base = intel_stolen_base, .size = chv_stolen_size, @@ -523,6 +545,7 @@ static const struct pci_device_id intel_stolen_ids[] __initconst = { INTEL_BDW_M_IDS(&gen8_stolen_funcs), INTEL_BDW_D_IDS(&gen8_stolen_funcs), INTEL_CHV_IDS(&chv_stolen_funcs), + INTEL_SKL_IDS(&gen9_stolen_funcs), }; static void __init intel_graphics_stolen(int num, int slot, int func) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 9a024f899dd40a040ccede1571e1e2919e61bea9..f3334829e55a3e6ac393a348c6da0e7aef159709 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -153,7 +153,6 @@ static struct page *i8xx_alloc_pages(void) __free_pages(page, 2); return NULL; } - get_page(page); atomic_inc(&agp_bridge->current_memory_agp); return page; } @@ -164,7 +163,6 @@ static void i8xx_destroy_pages(struct page *page) return; set_pages_wb(page, 4); - put_page(page); __free_pages(page, 2); atomic_dec(&agp_bridge->current_memory_agp); } @@ -300,7 +298,6 @@ static int intel_gtt_setup_scratch_page(void) page = alloc_page(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); if (page == NULL) return -ENOMEM; - get_page(page); set_pages_uc(page, 1); if (intel_private.needs_dmar) { @@ -560,7 +557,6 @@ static void intel_gtt_teardown_scratch_page(void) set_pages_wb(intel_private.scratch_page, 1); pci_unmap_page(intel_private.pcidev, intel_private.scratch_page_dma, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - put_page(intel_private.scratch_page); __free_page(intel_private.scratch_page); } diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index e3b4b0f02b3d1de3e07a9d40cb11b22d1822701f..c3413b6adb17caec6ac0f273bfa80ef8bacca960 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -167,6 +167,8 @@ config DRM_SAVAGE source "drivers/gpu/drm/exynos/Kconfig" +source "drivers/gpu/drm/rockchip/Kconfig" + source "drivers/gpu/drm/vmwgfx/Kconfig" source "drivers/gpu/drm/gma500/Kconfig" @@ -200,3 +202,7 @@ source "drivers/gpu/drm/tegra/Kconfig" source "drivers/gpu/drm/panel/Kconfig" source "drivers/gpu/drm/sti/Kconfig" + +source "drivers/gpu/drm/amd/amdkfd/Kconfig" + +source "drivers/gpu/drm/imx/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 9292a761ea6db9fa2844f762726b717e1bbce4b2..66e40398b3d32220624cb7fad671c86058c84339 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -14,7 +14,7 @@ drm-y := drm_auth.o drm_bufs.o drm_cache.o \ drm_info.o drm_debugfs.o drm_encoder_slave.o \ drm_trace_points.o drm_global.o drm_prime.o \ drm_rect.o drm_vma_manager.o drm_flip_work.o \ - drm_modeset_lock.o + drm_modeset_lock.o drm_atomic.o drm-$(CONFIG_COMPAT) += drm_ioc32.o drm-$(CONFIG_DRM_GEM_CMA_HELPER) += drm_gem_cma_helper.o @@ -23,7 +23,7 @@ drm-$(CONFIG_DRM_PANEL) += drm_panel.o drm-$(CONFIG_OF) += drm_of.o drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \ - drm_plane_helper.o drm_dp_mst_topology.o + drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o drm_kms_helper-$(CONFIG_DRM_KMS_FB_HELPER) += drm_fb_helper.o drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o @@ -49,6 +49,7 @@ obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/ obj-$(CONFIG_DRM_VIA) +=via/ obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/ obj-$(CONFIG_DRM_EXYNOS) +=exynos/ +obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/ obj-$(CONFIG_DRM_GMA500) += gma500/ obj-$(CONFIG_DRM_UDL) += udl/ obj-$(CONFIG_DRM_AST) += ast/ @@ -62,6 +63,8 @@ obj-$(CONFIG_DRM_BOCHS) += bochs/ obj-$(CONFIG_DRM_MSM) += msm/ obj-$(CONFIG_DRM_TEGRA) += tegra/ obj-$(CONFIG_DRM_STI) += sti/ +obj-$(CONFIG_DRM_IMX) += imx/ obj-y += i2c/ obj-y += panel/ obj-y += bridge/ +obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ diff --git a/drivers/gpu/drm/README.drm b/drivers/gpu/drm/README.drm deleted file mode 100644 index b5b3327225819fbf51df2daceca6d75fbd0dcf1b..0000000000000000000000000000000000000000 --- a/drivers/gpu/drm/README.drm +++ /dev/null @@ -1,43 +0,0 @@ -************************************************************ -* For the very latest on DRI development, please see: * -* http://dri.freedesktop.org/ * -************************************************************ - -The Direct Rendering Manager (drm) is a device-independent kernel-level -device driver that provides support for the XFree86 Direct Rendering -Infrastructure (DRI). - -The DRM supports the Direct Rendering Infrastructure (DRI) in four major -ways: - - 1. The DRM provides synchronized access to the graphics hardware via - the use of an optimized two-tiered lock. - - 2. The DRM enforces the DRI security policy for access to the graphics - hardware by only allowing authenticated X11 clients access to - restricted regions of memory. - - 3. The DRM provides a generic DMA engine, complete with multiple - queues and the ability to detect the need for an OpenGL context - switch. - - 4. The DRM is extensible via the use of small device-specific modules - that rely extensively on the API exported by the DRM module. - - -Documentation on the DRI is available from: - http://dri.freedesktop.org/wiki/Documentation - http://sourceforge.net/project/showfiles.php?group_id=387 - http://dri.sourceforge.net/doc/ - -For specific information about kernel-level support, see: - - The Direct Rendering Manager, Kernel Support for the Direct Rendering - Infrastructure - http://dri.sourceforge.net/doc/drm_low_level.html - - Hardware Locking for the Direct Rendering Infrastructure - http://dri.sourceforge.net/doc/hardware_locking_low_level.html - - A Security Analysis of the Direct Rendering Infrastructure - http://dri.sourceforge.net/doc/security_low_level.html diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..8dfac37ff32723bdf43b0d397ed64bfc0a45f08d --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -0,0 +1,9 @@ +# +# Heterogenous system architecture configuration +# + +config HSA_AMD + tristate "HSA kernel driver for AMD GPU devices" + depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64 + help + Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..be6246de5091d0a589c25de25d147d8ed786af39 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -0,0 +1,14 @@ +# +# Makefile for Heterogenous System Architecture support for AMD GPU devices +# + +ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ + +amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ + kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ + kfd_process.o kfd_queue.o kfd_mqd_manager.o \ + kfd_kernel_queue.o kfd_packet_manager.o \ + kfd_process_queue_manager.o kfd_device_queue_manager.o \ + kfd_interrupt.o + +obj-$(CONFIG_HSA_AMD) += amdkfd.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h new file mode 100644 index 0000000000000000000000000000000000000000..607fc5ceadbeffcb0e5682c4bdb4a2b7d6a2d078 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -0,0 +1,221 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef CIK_REGS_H +#define CIK_REGS_H + +#define IH_VMID_0_LUT 0x3D40u + +#define BIF_DOORBELL_CNTL 0x530Cu + +#define SRBM_GFX_CNTL 0xE44 +#define PIPEID(x) ((x) << 0) +#define MEID(x) ((x) << 2) +#define VMID(x) ((x) << 4) +#define QUEUEID(x) ((x) << 8) + +#define SQ_CONFIG 0x8C00 + +#define SH_MEM_BASES 0x8C28 +/* if PTR32, these are the bases for scratch and lds */ +#define PRIVATE_BASE(x) ((x) << 0) /* scratch */ +#define SHARED_BASE(x) ((x) << 16) /* LDS */ +#define SH_MEM_APE1_BASE 0x8C2C +/* if PTR32, this is the base location of GPUVM */ +#define SH_MEM_APE1_LIMIT 0x8C30 +/* if PTR32, this is the upper limit of GPUVM */ +#define SH_MEM_CONFIG 0x8C34 +#define PTR32 (1 << 0) +#define PRIVATE_ATC (1 << 1) +#define ALIGNMENT_MODE(x) ((x) << 2) +#define SH_MEM_ALIGNMENT_MODE_DWORD 0 +#define SH_MEM_ALIGNMENT_MODE_DWORD_STRICT 1 +#define SH_MEM_ALIGNMENT_MODE_STRICT 2 +#define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3 +#define DEFAULT_MTYPE(x) ((x) << 4) +#define APE1_MTYPE(x) ((x) << 7) + +/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ +#define MTYPE_CACHED 0 +#define MTYPE_NONCACHED 3 + + +#define SH_STATIC_MEM_CONFIG 0x9604u + +#define TC_CFG_L1_LOAD_POLICY0 0xAC68 +#define TC_CFG_L1_LOAD_POLICY1 0xAC6C +#define TC_CFG_L1_STORE_POLICY 0xAC70 +#define TC_CFG_L2_LOAD_POLICY0 0xAC74 +#define TC_CFG_L2_LOAD_POLICY1 0xAC78 +#define TC_CFG_L2_STORE_POLICY0 0xAC7C +#define TC_CFG_L2_STORE_POLICY1 0xAC80 +#define TC_CFG_L2_ATOMIC_POLICY 0xAC84 +#define TC_CFG_L1_VOLATILE 0xAC88 +#define TC_CFG_L2_VOLATILE 0xAC8C + +#define CP_PQ_WPTR_POLL_CNTL 0xC20C +#define WPTR_POLL_EN (1 << 31) + +#define CPC_INT_CNTL 0xC2D0 +#define CP_ME1_PIPE0_INT_CNTL 0xC214 +#define CP_ME1_PIPE1_INT_CNTL 0xC218 +#define CP_ME1_PIPE2_INT_CNTL 0xC21C +#define CP_ME1_PIPE3_INT_CNTL 0xC220 +#define CP_ME2_PIPE0_INT_CNTL 0xC224 +#define CP_ME2_PIPE1_INT_CNTL 0xC228 +#define CP_ME2_PIPE2_INT_CNTL 0xC22C +#define CP_ME2_PIPE3_INT_CNTL 0xC230 +#define DEQUEUE_REQUEST_INT_ENABLE (1 << 13) +#define WRM_POLL_TIMEOUT_INT_ENABLE (1 << 17) +#define PRIV_REG_INT_ENABLE (1 << 23) +#define TIME_STAMP_INT_ENABLE (1 << 26) +#define GENERIC2_INT_ENABLE (1 << 29) +#define GENERIC1_INT_ENABLE (1 << 30) +#define GENERIC0_INT_ENABLE (1 << 31) +#define CP_ME1_PIPE0_INT_STATUS 0xC214 +#define CP_ME1_PIPE1_INT_STATUS 0xC218 +#define CP_ME1_PIPE2_INT_STATUS 0xC21C +#define CP_ME1_PIPE3_INT_STATUS 0xC220 +#define CP_ME2_PIPE0_INT_STATUS 0xC224 +#define CP_ME2_PIPE1_INT_STATUS 0xC228 +#define CP_ME2_PIPE2_INT_STATUS 0xC22C +#define CP_ME2_PIPE3_INT_STATUS 0xC230 +#define DEQUEUE_REQUEST_INT_STATUS (1 << 13) +#define WRM_POLL_TIMEOUT_INT_STATUS (1 << 17) +#define PRIV_REG_INT_STATUS (1 << 23) +#define TIME_STAMP_INT_STATUS (1 << 26) +#define GENERIC2_INT_STATUS (1 << 29) +#define GENERIC1_INT_STATUS (1 << 30) +#define GENERIC0_INT_STATUS (1 << 31) + +#define CP_HPD_EOP_BASE_ADDR 0xC904 +#define CP_HPD_EOP_BASE_ADDR_HI 0xC908 +#define CP_HPD_EOP_VMID 0xC90C +#define CP_HPD_EOP_CONTROL 0xC910 +#define EOP_SIZE(x) ((x) << 0) +#define EOP_SIZE_MASK (0x3f << 0) +#define CP_MQD_BASE_ADDR 0xC914 +#define CP_MQD_BASE_ADDR_HI 0xC918 +#define CP_HQD_ACTIVE 0xC91C +#define CP_HQD_VMID 0xC920 + +#define CP_HQD_PERSISTENT_STATE 0xC924u +#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) +#define PRELOAD_REQ (1 << 0) + +#define CP_HQD_PIPE_PRIORITY 0xC928u +#define CP_HQD_QUEUE_PRIORITY 0xC92Cu +#define CP_HQD_QUANTUM 0xC930u +#define QUANTUM_EN 1U +#define QUANTUM_SCALE_1MS (1U << 4) +#define QUANTUM_DURATION(x) ((x) << 8) + +#define CP_HQD_PQ_BASE 0xC934 +#define CP_HQD_PQ_BASE_HI 0xC938 +#define CP_HQD_PQ_RPTR 0xC93C +#define CP_HQD_PQ_RPTR_REPORT_ADDR 0xC940 +#define CP_HQD_PQ_RPTR_REPORT_ADDR_HI 0xC944 +#define CP_HQD_PQ_WPTR_POLL_ADDR 0xC948 +#define CP_HQD_PQ_WPTR_POLL_ADDR_HI 0xC94C +#define CP_HQD_PQ_DOORBELL_CONTROL 0xC950 +#define DOORBELL_OFFSET(x) ((x) << 2) +#define DOORBELL_OFFSET_MASK (0x1fffff << 2) +#define DOORBELL_SOURCE (1 << 28) +#define DOORBELL_SCHD_HIT (1 << 29) +#define DOORBELL_EN (1 << 30) +#define DOORBELL_HIT (1 << 31) +#define CP_HQD_PQ_WPTR 0xC954 +#define CP_HQD_PQ_CONTROL 0xC958 +#define QUEUE_SIZE(x) ((x) << 0) +#define QUEUE_SIZE_MASK (0x3f << 0) +#define RPTR_BLOCK_SIZE(x) ((x) << 8) +#define RPTR_BLOCK_SIZE_MASK (0x3f << 8) +#define MIN_AVAIL_SIZE(x) ((x) << 20) +#define PQ_ATC_EN (1 << 23) +#define PQ_VOLATILE (1 << 26) +#define NO_UPDATE_RPTR (1 << 27) +#define UNORD_DISPATCH (1 << 28) +#define ROQ_PQ_IB_FLIP (1 << 29) +#define PRIV_STATE (1 << 30) +#define KMD_QUEUE (1 << 31) + +#define DEFAULT_RPTR_BLOCK_SIZE RPTR_BLOCK_SIZE(5) +#define DEFAULT_MIN_AVAIL_SIZE MIN_AVAIL_SIZE(3) + +#define CP_HQD_IB_BASE_ADDR 0xC95Cu +#define CP_HQD_IB_BASE_ADDR_HI 0xC960u +#define CP_HQD_IB_RPTR 0xC964u +#define CP_HQD_IB_CONTROL 0xC968u +#define IB_ATC_EN (1U << 23) +#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) + +#define CP_HQD_DEQUEUE_REQUEST 0xC974 +#define DEQUEUE_REQUEST_DRAIN 1 +#define DEQUEUE_REQUEST_RESET 2 +#define DEQUEUE_INT (1U << 8) + +#define CP_HQD_SEMA_CMD 0xC97Cu +#define CP_HQD_MSG_TYPE 0xC980u +#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u +#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u +#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu +#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u +#define CP_HQD_HQ_SCHEDULER0 0xC994u +#define CP_HQD_HQ_SCHEDULER1 0xC998u + + +#define CP_MQD_CONTROL 0xC99C +#define MQD_VMID(x) ((x) << 0) +#define MQD_VMID_MASK (0xf << 0) +#define MQD_CONTROL_PRIV_STATE_EN (1U << 8) + +#define GRBM_GFX_INDEX 0x30800 +#define INSTANCE_INDEX(x) ((x) << 0) +#define SH_INDEX(x) ((x) << 8) +#define SE_INDEX(x) ((x) << 16) +#define SH_BROADCAST_WRITES (1 << 29) +#define INSTANCE_BROADCAST_WRITES (1 << 30) +#define SE_BROADCAST_WRITES (1 << 31) + +#define SQC_CACHES 0x30d20 +#define SQC_POLICY 0x8C38u +#define SQC_VOLATILE 0x8C3Cu + +#define CP_PERFMON_CNTL 0x36020 + +#define ATC_VMID0_PASID_MAPPING 0x339Cu +#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u +#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) + +#define ATC_VM_APERTURE0_CNTL 0x3310u +#define ATS_ACCESS_MODE_NEVER 0 +#define ATS_ACCESS_MODE_ALWAYS 1 + +#define ATC_VM_APERTURE0_CNTL2 0x3318u +#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u +#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u +#define ATC_VM_APERTURE1_CNTL 0x3314u +#define ATC_VM_APERTURE1_CNTL2 0x331Cu +#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu +#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c new file mode 100644 index 0000000000000000000000000000000000000000..4f7b275f2f7b0fdbb488055057c3291a1aab3a70 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -0,0 +1,595 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_device_queue_manager.h" + +static long kfd_ioctl(struct file *, unsigned int, unsigned long); +static int kfd_open(struct inode *, struct file *); +static int kfd_mmap(struct file *, struct vm_area_struct *); + +static const char kfd_dev_name[] = "kfd"; + +static const struct file_operations kfd_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = kfd_ioctl, + .compat_ioctl = kfd_ioctl, + .open = kfd_open, + .mmap = kfd_mmap, +}; + +static int kfd_char_dev_major = -1; +static struct class *kfd_class; +struct device *kfd_device; + +int kfd_chardev_init(void) +{ + int err = 0; + + kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); + err = kfd_char_dev_major; + if (err < 0) + goto err_register_chrdev; + + kfd_class = class_create(THIS_MODULE, kfd_dev_name); + err = PTR_ERR(kfd_class); + if (IS_ERR(kfd_class)) + goto err_class_create; + + kfd_device = device_create(kfd_class, NULL, + MKDEV(kfd_char_dev_major, 0), + NULL, kfd_dev_name); + err = PTR_ERR(kfd_device); + if (IS_ERR(kfd_device)) + goto err_device_create; + + return 0; + +err_device_create: + class_destroy(kfd_class); +err_class_create: + unregister_chrdev(kfd_char_dev_major, kfd_dev_name); +err_register_chrdev: + return err; +} + +void kfd_chardev_exit(void) +{ + device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); + class_destroy(kfd_class); + unregister_chrdev(kfd_char_dev_major, kfd_dev_name); +} + +struct device *kfd_chardev(void) +{ + return kfd_device; +} + + +static int kfd_open(struct inode *inode, struct file *filep) +{ + struct kfd_process *process; + bool is_32bit_user_mode; + + if (iminor(inode) != 0) + return -ENODEV; + + is_32bit_user_mode = is_compat_task(); + + if (is_32bit_user_mode == true) { + dev_warn(kfd_device, + "Process %d (32-bit) failed to open /dev/kfd\n" + "32-bit processes are not supported by amdkfd\n", + current->pid); + return -EPERM; + } + + process = kfd_create_process(current); + if (IS_ERR(process)) + return PTR_ERR(process); + + process->is_32bit_user_mode = is_32bit_user_mode; + + dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", + process->pasid, process->is_32bit_user_mode); + + kfd_init_apertures(process); + + return 0; +} + +static long kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, + void __user *arg) +{ + struct kfd_ioctl_get_version_args args; + int err = 0; + + args.major_version = KFD_IOCTL_MAJOR_VERSION; + args.minor_version = KFD_IOCTL_MINOR_VERSION; + + if (copy_to_user(arg, &args, sizeof(args))) + err = -EFAULT; + + return err; +} + +static int set_queue_properties_from_user(struct queue_properties *q_properties, + struct kfd_ioctl_create_queue_args *args) +{ + if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + return -EINVAL; + } + + if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { + pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + return -EINVAL; + } + + if ((args->ring_base_address) && + (!access_ok(VERIFY_WRITE, + (const void __user *) args->ring_base_address, + sizeof(uint64_t)))) { + pr_err("kfd: can't access ring base address\n"); + return -EFAULT; + } + + if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { + pr_err("kfd: ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + + if (!access_ok(VERIFY_WRITE, + (const void __user *) args->read_pointer_address, + sizeof(uint32_t))) { + pr_err("kfd: can't access read pointer\n"); + return -EFAULT; + } + + if (!access_ok(VERIFY_WRITE, + (const void __user *) args->write_pointer_address, + sizeof(uint32_t))) { + pr_err("kfd: can't access write pointer\n"); + return -EFAULT; + } + + q_properties->is_interop = false; + q_properties->queue_percent = args->queue_percentage; + q_properties->priority = args->queue_priority; + q_properties->queue_address = args->ring_base_address; + q_properties->queue_size = args->ring_size; + q_properties->read_ptr = (uint32_t *) args->read_pointer_address; + q_properties->write_ptr = (uint32_t *) args->write_pointer_address; + if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || + args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) + q_properties->type = KFD_QUEUE_TYPE_COMPUTE; + else + return -ENOTSUPP; + + if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) + q_properties->format = KFD_QUEUE_FORMAT_AQL; + else + q_properties->format = KFD_QUEUE_FORMAT_PM4; + + pr_debug("Queue Percentage (%d, %d)\n", + q_properties->queue_percent, args->queue_percentage); + + pr_debug("Queue Priority (%d, %d)\n", + q_properties->priority, args->queue_priority); + + pr_debug("Queue Address (0x%llX, 0x%llX)\n", + q_properties->queue_address, args->ring_base_address); + + pr_debug("Queue Size (0x%llX, %u)\n", + q_properties->queue_size, args->ring_size); + + pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n", + (uint64_t) q_properties->read_ptr, + (uint64_t) q_properties->write_ptr); + + pr_debug("Queue Format (%d)\n", q_properties->format); + + return 0; +} + +static long kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, + void __user *arg) +{ + struct kfd_ioctl_create_queue_args args; + struct kfd_dev *dev; + int err = 0; + unsigned int queue_id; + struct kfd_process_device *pdd; + struct queue_properties q_properties; + + memset(&q_properties, 0, sizeof(struct queue_properties)); + + if (copy_from_user(&args, arg, sizeof(args))) + return -EFAULT; + + pr_debug("kfd: creating queue ioctl\n"); + + err = set_queue_properties_from_user(&q_properties, &args); + if (err) + return err; + + dev = kfd_device_by_id(args.gpu_id); + if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto err_bind_process; + } + + pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n", + p->pasid, + dev->id); + + err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, 0, + KFD_QUEUE_TYPE_COMPUTE, &queue_id); + if (err != 0) + goto err_create_queue; + + args.queue_id = queue_id; + + /* Return gpu_id as doorbell offset for mmap usage */ + args.doorbell_offset = args.gpu_id << PAGE_SHIFT; + + if (copy_to_user(arg, &args, sizeof(args))) { + err = -EFAULT; + goto err_copy_args_out; + } + + mutex_unlock(&p->mutex); + + pr_debug("kfd: queue id %d was created successfully\n", args.queue_id); + + pr_debug("ring buffer address == 0x%016llX\n", + args.ring_base_address); + + pr_debug("read ptr address == 0x%016llX\n", + args.read_pointer_address); + + pr_debug("write ptr address == 0x%016llX\n", + args.write_pointer_address); + + return 0; + +err_copy_args_out: + pqm_destroy_queue(&p->pqm, queue_id); +err_create_queue: +err_bind_process: + mutex_unlock(&p->mutex); + return err; +} + +static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, + void __user *arg) +{ + int retval; + struct kfd_ioctl_destroy_queue_args args; + + if (copy_from_user(&args, arg, sizeof(args))) + return -EFAULT; + + pr_debug("kfd: destroying queue id %d for PASID %d\n", + args.queue_id, + p->pasid); + + mutex_lock(&p->mutex); + + retval = pqm_destroy_queue(&p->pqm, args.queue_id); + + mutex_unlock(&p->mutex); + return retval; +} + +static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, + void __user *arg) +{ + int retval; + struct kfd_ioctl_update_queue_args args; + struct queue_properties properties; + + if (copy_from_user(&args, arg, sizeof(args))) + return -EFAULT; + + if (args.queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); + return -EINVAL; + } + + if (args.queue_priority > KFD_MAX_QUEUE_PRIORITY) { + pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); + return -EINVAL; + } + + if ((args.ring_base_address) && + (!access_ok(VERIFY_WRITE, + (const void __user *) args.ring_base_address, + sizeof(uint64_t)))) { + pr_err("kfd: can't access ring base address\n"); + return -EFAULT; + } + + if (!is_power_of_2(args.ring_size) && (args.ring_size != 0)) { + pr_err("kfd: ring size must be a power of 2 or 0\n"); + return -EINVAL; + } + + properties.queue_address = args.ring_base_address; + properties.queue_size = args.ring_size; + properties.queue_percent = args.queue_percentage; + properties.priority = args.queue_priority; + + pr_debug("kfd: updating queue id %d for PASID %d\n", + args.queue_id, p->pasid); + + mutex_lock(&p->mutex); + + retval = pqm_update_queue(&p->pqm, args.queue_id, &properties); + + mutex_unlock(&p->mutex); + + return retval; +} + +static long kfd_ioctl_set_memory_policy(struct file *filep, + struct kfd_process *p, void __user *arg) +{ + struct kfd_ioctl_set_memory_policy_args args; + struct kfd_dev *dev; + int err = 0; + struct kfd_process_device *pdd; + enum cache_policy default_policy, alternate_policy; + + if (copy_from_user(&args, arg, sizeof(args))) + return -EFAULT; + + if (args.default_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args.default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + return -EINVAL; + } + + if (args.alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT + && args.alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { + return -EINVAL; + } + + dev = kfd_device_by_id(args.gpu_id); + if (dev == NULL) + return -EINVAL; + + mutex_lock(&p->mutex); + + pdd = kfd_bind_process_to_device(dev, p); + if (IS_ERR(pdd)) { + err = PTR_ERR(pdd); + goto out; + } + + default_policy = (args.default_policy == KFD_IOC_CACHE_POLICY_COHERENT) + ? cache_policy_coherent : cache_policy_noncoherent; + + alternate_policy = + (args.alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) + ? cache_policy_coherent : cache_policy_noncoherent; + + if (!dev->dqm->set_cache_memory_policy(dev->dqm, + &pdd->qpd, + default_policy, + alternate_policy, + (void __user *)args.alternate_aperture_base, + args.alternate_aperture_size)) + err = -EINVAL; + +out: + mutex_unlock(&p->mutex); + + return err; +} + +static long kfd_ioctl_get_clock_counters(struct file *filep, + struct kfd_process *p, void __user *arg) +{ + struct kfd_ioctl_get_clock_counters_args args; + struct kfd_dev *dev; + struct timespec time; + + if (copy_from_user(&args, arg, sizeof(args))) + return -EFAULT; + + dev = kfd_device_by_id(args.gpu_id); + if (dev == NULL) + return -EINVAL; + + /* Reading GPU clock counter from KGD */ + args.gpu_clock_counter = kfd2kgd->get_gpu_clock_counter(dev->kgd); + + /* No access to rdtsc. Using raw monotonic time */ + getrawmonotonic(&time); + args.cpu_clock_counter = (uint64_t)timespec_to_ns(&time); + + get_monotonic_boottime(&time); + args.system_clock_counter = (uint64_t)timespec_to_ns(&time); + + /* Since the counter is in nano-seconds we use 1GHz frequency */ + args.system_clock_freq = 1000000000; + + if (copy_to_user(arg, &args, sizeof(args))) + return -EFAULT; + + return 0; +} + + +static int kfd_ioctl_get_process_apertures(struct file *filp, + struct kfd_process *p, void __user *arg) +{ + struct kfd_ioctl_get_process_apertures_args args; + struct kfd_process_device_apertures *pAperture; + struct kfd_process_device *pdd; + + dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); + + if (copy_from_user(&args, arg, sizeof(args))) + return -EFAULT; + + args.num_of_nodes = 0; + + mutex_lock(&p->mutex); + + /*if the process-device list isn't empty*/ + if (kfd_has_process_device_data(p)) { + /* Run over all pdd of the process */ + pdd = kfd_get_first_process_device_data(p); + do { + pAperture = &args.process_apertures[args.num_of_nodes]; + pAperture->gpu_id = pdd->dev->id; + pAperture->lds_base = pdd->lds_base; + pAperture->lds_limit = pdd->lds_limit; + pAperture->gpuvm_base = pdd->gpuvm_base; + pAperture->gpuvm_limit = pdd->gpuvm_limit; + pAperture->scratch_base = pdd->scratch_base; + pAperture->scratch_limit = pdd->scratch_limit; + + dev_dbg(kfd_device, + "node id %u\n", args.num_of_nodes); + dev_dbg(kfd_device, + "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, + "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, + "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, + "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, + "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, + "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, + "scratch_limit %llX\n", pdd->scratch_limit); + + args.num_of_nodes++; + } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL && + (args.num_of_nodes < NUM_OF_SUPPORTED_GPUS)); + } + + mutex_unlock(&p->mutex); + + if (copy_to_user(arg, &args, sizeof(args))) + return -EFAULT; + + return 0; +} + +static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) +{ + struct kfd_process *process; + long err = -EINVAL; + + dev_dbg(kfd_device, + "ioctl cmd 0x%x (#%d), arg 0x%lx\n", + cmd, _IOC_NR(cmd), arg); + + process = kfd_get_process(current); + if (IS_ERR(process)) + return PTR_ERR(process); + + switch (cmd) { + case KFD_IOC_GET_VERSION: + err = kfd_ioctl_get_version(filep, process, (void __user *)arg); + break; + case KFD_IOC_CREATE_QUEUE: + err = kfd_ioctl_create_queue(filep, process, + (void __user *)arg); + break; + + case KFD_IOC_DESTROY_QUEUE: + err = kfd_ioctl_destroy_queue(filep, process, + (void __user *)arg); + break; + + case KFD_IOC_SET_MEMORY_POLICY: + err = kfd_ioctl_set_memory_policy(filep, process, + (void __user *)arg); + break; + + case KFD_IOC_GET_CLOCK_COUNTERS: + err = kfd_ioctl_get_clock_counters(filep, process, + (void __user *)arg); + break; + + case KFD_IOC_GET_PROCESS_APERTURES: + err = kfd_ioctl_get_process_apertures(filep, process, + (void __user *)arg); + break; + + case KFD_IOC_UPDATE_QUEUE: + err = kfd_ioctl_update_queue(filep, process, + (void __user *)arg); + break; + + default: + dev_err(kfd_device, + "unknown ioctl cmd 0x%x, arg 0x%lx)\n", + cmd, arg); + err = -EINVAL; + break; + } + + if (err < 0) + dev_err(kfd_device, + "ioctl error %ld for ioctl cmd 0x%x (#%d)\n", + err, cmd, _IOC_NR(cmd)); + + return err; +} + +static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct kfd_process *process; + + process = kfd_get_process(current); + if (IS_ERR(process)) + return PTR_ERR(process); + + return kfd_doorbell_mmap(process, vma); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h new file mode 100644 index 0000000000000000000000000000000000000000..a374fa3d3ee6f28ad009cc30d4217df003248caf --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -0,0 +1,294 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_CRAT_H_INCLUDED +#define KFD_CRAT_H_INCLUDED + +#include + +#pragma pack(1) + +/* + * 4CC signature values for the CRAT and CDIT ACPI tables + */ + +#define CRAT_SIGNATURE "CRAT" +#define CDIT_SIGNATURE "CDIT" + +/* + * Component Resource Association Table (CRAT) + */ + +#define CRAT_OEMID_LENGTH 6 +#define CRAT_OEMTABLEID_LENGTH 8 +#define CRAT_RESERVED_LENGTH 6 + +#define CRAT_OEMID_64BIT_MASK ((1ULL << (CRAT_OEMID_LENGTH * 8)) - 1) + +struct crat_header { + uint32_t signature; + uint32_t length; + uint8_t revision; + uint8_t checksum; + uint8_t oem_id[CRAT_OEMID_LENGTH]; + uint8_t oem_table_id[CRAT_OEMTABLEID_LENGTH]; + uint32_t oem_revision; + uint32_t creator_id; + uint32_t creator_revision; + uint32_t total_entries; + uint16_t num_domains; + uint8_t reserved[CRAT_RESERVED_LENGTH]; +}; + +/* + * The header structure is immediately followed by total_entries of the + * data definitions + */ + +/* + * The currently defined subtype entries in the CRAT + */ +#define CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY 0 +#define CRAT_SUBTYPE_MEMORY_AFFINITY 1 +#define CRAT_SUBTYPE_CACHE_AFFINITY 2 +#define CRAT_SUBTYPE_TLB_AFFINITY 3 +#define CRAT_SUBTYPE_CCOMPUTE_AFFINITY 4 +#define CRAT_SUBTYPE_IOLINK_AFFINITY 5 +#define CRAT_SUBTYPE_MAX 6 + +#define CRAT_SIBLINGMAP_SIZE 32 + +/* + * ComputeUnit Affinity structure and definitions + */ +#define CRAT_CU_FLAGS_ENABLED 0x00000001 +#define CRAT_CU_FLAGS_HOT_PLUGGABLE 0x00000002 +#define CRAT_CU_FLAGS_CPU_PRESENT 0x00000004 +#define CRAT_CU_FLAGS_GPU_PRESENT 0x00000008 +#define CRAT_CU_FLAGS_IOMMU_PRESENT 0x00000010 +#define CRAT_CU_FLAGS_RESERVED 0xffffffe0 + +#define CRAT_COMPUTEUNIT_RESERVED_LENGTH 4 + +struct crat_subtype_computeunit { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; + uint32_t proximity_domain; + uint32_t processor_id_low; + uint16_t num_cpu_cores; + uint16_t num_simd_cores; + uint16_t max_waves_simd; + uint16_t io_count; + uint16_t hsa_capability; + uint16_t lds_size_in_kb; + uint8_t wave_front_size; + uint8_t num_banks; + uint16_t micro_engine_id; + uint8_t num_arrays; + uint8_t num_cu_per_array; + uint8_t num_simd_per_cu; + uint8_t max_slots_scatch_cu; + uint8_t reserved2[CRAT_COMPUTEUNIT_RESERVED_LENGTH]; +}; + +/* + * HSA Memory Affinity structure and definitions + */ +#define CRAT_MEM_FLAGS_ENABLED 0x00000001 +#define CRAT_MEM_FLAGS_HOT_PLUGGABLE 0x00000002 +#define CRAT_MEM_FLAGS_NON_VOLATILE 0x00000004 +#define CRAT_MEM_FLAGS_RESERVED 0xfffffff8 + +#define CRAT_MEMORY_RESERVED_LENGTH 8 + +struct crat_subtype_memory { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; + uint32_t promixity_domain; + uint32_t base_addr_low; + uint32_t base_addr_high; + uint32_t length_low; + uint32_t length_high; + uint32_t width; + uint8_t reserved2[CRAT_MEMORY_RESERVED_LENGTH]; +}; + +/* + * HSA Cache Affinity structure and definitions + */ +#define CRAT_CACHE_FLAGS_ENABLED 0x00000001 +#define CRAT_CACHE_FLAGS_DATA_CACHE 0x00000002 +#define CRAT_CACHE_FLAGS_INST_CACHE 0x00000004 +#define CRAT_CACHE_FLAGS_CPU_CACHE 0x00000008 +#define CRAT_CACHE_FLAGS_SIMD_CACHE 0x00000010 +#define CRAT_CACHE_FLAGS_RESERVED 0xffffffe0 + +#define CRAT_CACHE_RESERVED_LENGTH 8 + +struct crat_subtype_cache { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; + uint32_t processor_id_low; + uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + uint32_t cache_size; + uint8_t cache_level; + uint8_t lines_per_tag; + uint16_t cache_line_size; + uint8_t associativity; + uint8_t cache_properties; + uint16_t cache_latency; + uint8_t reserved2[CRAT_CACHE_RESERVED_LENGTH]; +}; + +/* + * HSA TLB Affinity structure and definitions + */ +#define CRAT_TLB_FLAGS_ENABLED 0x00000001 +#define CRAT_TLB_FLAGS_DATA_TLB 0x00000002 +#define CRAT_TLB_FLAGS_INST_TLB 0x00000004 +#define CRAT_TLB_FLAGS_CPU_TLB 0x00000008 +#define CRAT_TLB_FLAGS_SIMD_TLB 0x00000010 +#define CRAT_TLB_FLAGS_RESERVED 0xffffffe0 + +#define CRAT_TLB_RESERVED_LENGTH 4 + +struct crat_subtype_tlb { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; + uint32_t processor_id_low; + uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + uint32_t tlb_level; + uint8_t data_tlb_associativity_2mb; + uint8_t data_tlb_size_2mb; + uint8_t instruction_tlb_associativity_2mb; + uint8_t instruction_tlb_size_2mb; + uint8_t data_tlb_associativity_4k; + uint8_t data_tlb_size_4k; + uint8_t instruction_tlb_associativity_4k; + uint8_t instruction_tlb_size_4k; + uint8_t data_tlb_associativity_1gb; + uint8_t data_tlb_size_1gb; + uint8_t instruction_tlb_associativity_1gb; + uint8_t instruction_tlb_size_1gb; + uint8_t reserved2[CRAT_TLB_RESERVED_LENGTH]; +}; + +/* + * HSA CCompute/APU Affinity structure and definitions + */ +#define CRAT_CCOMPUTE_FLAGS_ENABLED 0x00000001 +#define CRAT_CCOMPUTE_FLAGS_RESERVED 0xfffffffe + +#define CRAT_CCOMPUTE_RESERVED_LENGTH 16 + +struct crat_subtype_ccompute { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; + uint32_t processor_id_low; + uint8_t sibling_map[CRAT_SIBLINGMAP_SIZE]; + uint32_t apu_size; + uint8_t reserved2[CRAT_CCOMPUTE_RESERVED_LENGTH]; +}; + +/* + * HSA IO Link Affinity structure and definitions + */ +#define CRAT_IOLINK_FLAGS_ENABLED 0x00000001 +#define CRAT_IOLINK_FLAGS_COHERENCY 0x00000002 +#define CRAT_IOLINK_FLAGS_RESERVED 0xfffffffc + +/* + * IO interface types + */ +#define CRAT_IOLINK_TYPE_UNDEFINED 0 +#define CRAT_IOLINK_TYPE_HYPERTRANSPORT 1 +#define CRAT_IOLINK_TYPE_PCIEXPRESS 2 +#define CRAT_IOLINK_TYPE_OTHER 3 +#define CRAT_IOLINK_TYPE_MAX 255 + +#define CRAT_IOLINK_RESERVED_LENGTH 24 + +struct crat_subtype_iolink { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; + uint32_t proximity_domain_from; + uint32_t proximity_domain_to; + uint8_t io_interface_type; + uint8_t version_major; + uint16_t version_minor; + uint32_t minimum_latency; + uint32_t maximum_latency; + uint32_t minimum_bandwidth_mbs; + uint32_t maximum_bandwidth_mbs; + uint32_t recommended_transfer_size; + uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH]; +}; + +/* + * HSA generic sub-type header + */ + +#define CRAT_SUBTYPE_FLAGS_ENABLED 0x00000001 + +struct crat_subtype_generic { + uint8_t type; + uint8_t length; + uint16_t reserved; + uint32_t flags; +}; + +/* + * Component Locality Distance Information Table (CDIT) + */ +#define CDIT_OEMID_LENGTH 6 +#define CDIT_OEMTABLEID_LENGTH 8 + +struct cdit_header { + uint32_t signature; + uint32_t length; + uint8_t revision; + uint8_t checksum; + uint8_t oem_id[CDIT_OEMID_LENGTH]; + uint8_t oem_table_id[CDIT_OEMTABLEID_LENGTH]; + uint32_t oem_revision; + uint32_t creator_id; + uint32_t creator_revision; + uint32_t total_entries; + uint16_t num_domains; + uint8_t entry[1]; +}; + +#pragma pack() + +#endif /* KFD_CRAT_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c new file mode 100644 index 0000000000000000000000000000000000000000..43884ebd4303fedd47b56431737a0f726852c552 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -0,0 +1,308 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_device_queue_manager.h" + +#define MQD_SIZE_ALIGNED 768 + +static const struct kfd_device_info kaveri_device_info = { + .max_pasid_bits = 16, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .mqd_size_aligned = MQD_SIZE_ALIGNED +}; + +struct kfd_deviceid { + unsigned short did; + const struct kfd_device_info *device_info; +}; + +/* Please keep this sorted by increasing device id. */ +static const struct kfd_deviceid supported_devices[] = { + { 0x1304, &kaveri_device_info }, /* Kaveri */ + { 0x1305, &kaveri_device_info }, /* Kaveri */ + { 0x1306, &kaveri_device_info }, /* Kaveri */ + { 0x1307, &kaveri_device_info }, /* Kaveri */ + { 0x1309, &kaveri_device_info }, /* Kaveri */ + { 0x130A, &kaveri_device_info }, /* Kaveri */ + { 0x130B, &kaveri_device_info }, /* Kaveri */ + { 0x130C, &kaveri_device_info }, /* Kaveri */ + { 0x130D, &kaveri_device_info }, /* Kaveri */ + { 0x130E, &kaveri_device_info }, /* Kaveri */ + { 0x130F, &kaveri_device_info }, /* Kaveri */ + { 0x1310, &kaveri_device_info }, /* Kaveri */ + { 0x1311, &kaveri_device_info }, /* Kaveri */ + { 0x1312, &kaveri_device_info }, /* Kaveri */ + { 0x1313, &kaveri_device_info }, /* Kaveri */ + { 0x1315, &kaveri_device_info }, /* Kaveri */ + { 0x1316, &kaveri_device_info }, /* Kaveri */ + { 0x1317, &kaveri_device_info }, /* Kaveri */ + { 0x1318, &kaveri_device_info }, /* Kaveri */ + { 0x131B, &kaveri_device_info }, /* Kaveri */ + { 0x131C, &kaveri_device_info }, /* Kaveri */ + { 0x131D, &kaveri_device_info }, /* Kaveri */ +}; + +static const struct kfd_device_info *lookup_device_info(unsigned short did) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(supported_devices); i++) { + if (supported_devices[i].did == did) { + BUG_ON(supported_devices[i].device_info == NULL); + return supported_devices[i].device_info; + } + } + + return NULL; +} + +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev) +{ + struct kfd_dev *kfd; + + const struct kfd_device_info *device_info = + lookup_device_info(pdev->device); + + if (!device_info) + return NULL; + + kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); + if (!kfd) + return NULL; + + kfd->kgd = kgd; + kfd->device_info = device_info; + kfd->pdev = pdev; + kfd->init_complete = false; + + return kfd; +} + +static bool device_iommu_pasid_init(struct kfd_dev *kfd) +{ + const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + + struct amd_iommu_device_info iommu_info; + unsigned int pasid_limit; + int err; + + err = amd_iommu_device_info(kfd->pdev, &iommu_info); + if (err < 0) { + dev_err(kfd_device, + "error getting iommu info. is the iommu enabled?\n"); + return false; + } + + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { + dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n", + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0); + return false; + } + + pasid_limit = min_t(unsigned int, + (unsigned int)1 << kfd->device_info->max_pasid_bits, + iommu_info.max_pasids); + /* + * last pasid is used for kernel queues doorbells + * in the future the last pasid might be used for a kernel thread. + */ + pasid_limit = min_t(unsigned int, + pasid_limit, + kfd->doorbell_process_limit - 1); + + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err < 0) { + dev_err(kfd_device, "error initializing iommu device\n"); + return false; + } + + if (!kfd_set_pasid_limit(pasid_limit)) { + dev_err(kfd_device, "error setting pasid limit\n"); + amd_iommu_free_device(kfd->pdev); + return false; + } + + return true; +} + +static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) +{ + struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); + + if (dev) + kfd_unbind_process_from_device(dev, pasid); +} + +bool kgd2kfd_device_init(struct kfd_dev *kfd, + const struct kgd2kfd_shared_resources *gpu_resources) +{ + unsigned int size; + + kfd->shared_resources = *gpu_resources; + + /* calculate max size of mqds needed for queues */ + size = max_num_of_processes * + max_num_of_queues_per_process * + kfd->device_info->mqd_size_aligned; + + /* add another 512KB for all other allocations on gart */ + size += 512 * 1024; + + if (kfd2kgd->init_sa_manager(kfd->kgd, size)) { + dev_err(kfd_device, + "Error initializing sa manager for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto out; + } + + kfd_doorbell_init(kfd); + + if (kfd_topology_add_device(kfd) != 0) { + dev_err(kfd_device, + "Error adding device (%x:%x) to topology\n", + kfd->pdev->vendor, kfd->pdev->device); + goto kfd_topology_add_device_error; + } + + if (kfd_interrupt_init(kfd)) { + dev_err(kfd_device, + "Error initializing interrupts for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto kfd_interrupt_error; + } + + if (!device_iommu_pasid_init(kfd)) { + dev_err(kfd_device, + "Error initializing iommuv2 for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto device_iommu_pasid_error; + } + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + + kfd->dqm = device_queue_manager_init(kfd); + if (!kfd->dqm) { + dev_err(kfd_device, + "Error initializing queue manager for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto device_queue_manager_error; + } + + if (kfd->dqm->start(kfd->dqm) != 0) { + dev_err(kfd_device, + "Error starting queuen manager for device (%x:%x)\n", + kfd->pdev->vendor, kfd->pdev->device); + goto dqm_start_error; + } + + kfd->init_complete = true; + dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor, + kfd->pdev->device); + + pr_debug("kfd: Starting kfd with the following scheduling policy %d\n", + sched_policy); + + goto out; + +dqm_start_error: + device_queue_manager_uninit(kfd->dqm); +device_queue_manager_error: + amd_iommu_free_device(kfd->pdev); +device_iommu_pasid_error: + kfd_interrupt_exit(kfd); +kfd_interrupt_error: + kfd_topology_remove_device(kfd); +kfd_topology_add_device_error: + kfd2kgd->fini_sa_manager(kfd->kgd); + dev_err(kfd_device, + "device (%x:%x) NOT added due to errors\n", + kfd->pdev->vendor, kfd->pdev->device); +out: + return kfd->init_complete; +} + +void kgd2kfd_device_exit(struct kfd_dev *kfd) +{ + if (kfd->init_complete) { + device_queue_manager_uninit(kfd->dqm); + amd_iommu_free_device(kfd->pdev); + kfd_interrupt_exit(kfd); + kfd_topology_remove_device(kfd); + } + + kfree(kfd); +} + +void kgd2kfd_suspend(struct kfd_dev *kfd) +{ + BUG_ON(kfd == NULL); + + if (kfd->init_complete) { + kfd->dqm->stop(kfd->dqm); + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); + amd_iommu_free_device(kfd->pdev); + } +} + +int kgd2kfd_resume(struct kfd_dev *kfd) +{ + unsigned int pasid_limit; + int err; + + BUG_ON(kfd == NULL); + + pasid_limit = kfd_get_pasid_limit(); + + if (kfd->init_complete) { + err = amd_iommu_init_device(kfd->pdev, pasid_limit); + if (err < 0) + return -ENXIO; + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, + iommu_pasid_shutdown_callback); + kfd->dqm->start(kfd->dqm); + } + + return 0; +} + +/* This is called directly from KGD at ISR. */ +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) +{ + if (kfd->init_complete) { + spin_lock(&kfd->interrupt_lock); + + if (kfd->interrupts_active + && enqueue_ih_ring_entry(kfd, ih_ring_entry)) + schedule_work(&kfd->interrupt_work); + + spin_unlock(&kfd->interrupt_lock); + } +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c new file mode 100644 index 0000000000000000000000000000000000000000..924e90c072e513180ec8991b50333f2af663a3f5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -0,0 +1,1062 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_device_queue_manager.h" +#include "kfd_mqd_manager.h" +#include "cik_regs.h" +#include "kfd_kernel_queue.h" +#include "../../radeon/cik_reg.h" + +/* Size of the per-pipe EOP queue */ +#define CIK_HPD_EOP_BYTES_LOG2 11 +#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) + +static bool is_mem_initialized; + +static int init_memory(struct device_queue_manager *dqm); +static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, + unsigned int pasid, unsigned int vmid); + +static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd); +static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); +static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); + + +static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) +{ + BUG_ON(!dqm || !dqm->dev); + return dqm->dev->shared_resources.compute_pipe_count; +} + +static inline unsigned int get_first_pipe(struct device_queue_manager *dqm) +{ + BUG_ON(!dqm); + return dqm->dev->shared_resources.first_compute_pipe; +} + +static inline unsigned int get_pipes_num_cpsch(void) +{ + return PIPE_PER_ME_CP_SCHEDULING; +} + +static inline unsigned int +get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) +{ + uint32_t nybble; + + nybble = (pdd->lds_base >> 60) & 0x0E; + + return nybble; + +} + +static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) +{ + unsigned int shared_base; + + shared_base = (pdd->lds_base >> 16) & 0xFF; + + return shared_base; +} + +static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); +static void init_process_memory(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + unsigned int temp; + + BUG_ON(!dqm || !qpd); + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | + DEFAULT_MTYPE(MTYPE_NONCACHED) | + APE1_MTYPE(MTYPE_NONCACHED); + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + if (qpd->pqm->process->is_32bit_user_mode) { + temp = get_sh_mem_bases_32(pdd); + qpd->sh_mem_bases = SHARED_BASE(temp); + qpd->sh_mem_config |= PTR32; + } else { + temp = get_sh_mem_bases_nybble_64(pdd); + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); + } + + pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", + qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); +} + +static void program_sh_mem_settings(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, + qpd->sh_mem_config, + qpd->sh_mem_ape1_base, + qpd->sh_mem_ape1_limit, + qpd->sh_mem_bases); +} + +static int allocate_vmid(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int bit, allocated_vmid; + + if (dqm->vmid_bitmap == 0) + return -ENOMEM; + + bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); + clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); + + /* Kaveri kfd vmid's starts from vmid 8 */ + allocated_vmid = bit + KFD_VMID_START_OFFSET; + pr_debug("kfd: vmid allocation %d\n", allocated_vmid); + qpd->vmid = allocated_vmid; + q->properties.vmid = allocated_vmid; + + set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); + program_sh_mem_settings(dqm, qpd); + + return 0; +} + +static void deallocate_vmid(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int bit = qpd->vmid - KFD_VMID_START_OFFSET; + + set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); + qpd->vmid = 0; + q->properties.vmid = 0; +} + +static int create_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd, + int *allocated_vmid) +{ + int retval; + + BUG_ON(!dqm || !q || !qpd || !allocated_vmid); + + pr_debug("kfd: In func %s\n", __func__); + print_queue(q); + + mutex_lock(&dqm->lock); + + if (list_empty(&qpd->queues_list)) { + retval = allocate_vmid(dqm, qpd, q); + if (retval != 0) { + mutex_unlock(&dqm->lock); + return retval; + } + } + *allocated_vmid = qpd->vmid; + q->properties.vmid = qpd->vmid; + + retval = create_compute_queue_nocpsch(dqm, q, qpd); + + if (retval != 0) { + if (list_empty(&qpd->queues_list)) { + deallocate_vmid(dqm, qpd, q); + *allocated_vmid = 0; + } + mutex_unlock(&dqm->lock); + return retval; + } + + list_add(&q->list, &qpd->queues_list); + dqm->queue_count++; + + mutex_unlock(&dqm->lock); + return 0; +} + +static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) +{ + bool set; + int pipe, bit; + + set = false; + + for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm); + pipe = (pipe + 1) % get_pipes_num(dqm)) { + if (dqm->allocated_queues[pipe] != 0) { + bit = find_first_bit( + (unsigned long *)&dqm->allocated_queues[pipe], + QUEUES_PER_PIPE); + + clear_bit(bit, + (unsigned long *)&dqm->allocated_queues[pipe]); + q->pipe = pipe; + q->queue = bit; + set = true; + break; + } + } + + if (set == false) + return -EBUSY; + + pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", + __func__, q->pipe, q->queue); + /* horizontal hqd allocation */ + dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm); + + return 0; +} + +static inline void deallocate_hqd(struct device_queue_manager *dqm, + struct queue *q) +{ + set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); +} + +static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd) +{ + int retval; + struct mqd_manager *mqd; + + BUG_ON(!dqm || !q || !qpd); + + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + if (mqd == NULL) + return -ENOMEM; + + retval = allocate_hqd(dqm, q); + if (retval != 0) + return retval; + + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval != 0) { + deallocate_hqd(dqm, q); + return retval; + } + + return 0; +} + +static int destroy_queue_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int retval; + struct mqd_manager *mqd; + + BUG_ON(!dqm || !q || !q->mqd || !qpd); + + retval = 0; + + pr_debug("kfd: In Func %s\n", __func__); + + mutex_lock(&dqm->lock); + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + if (mqd == NULL) { + retval = -ENOMEM; + goto out; + } + + retval = mqd->destroy_mqd(mqd, q->mqd, + KFD_PREEMPT_TYPE_WAVEFRONT, + QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + q->pipe, q->queue); + + if (retval != 0) + goto out; + + deallocate_hqd(dqm, q); + + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + + list_del(&q->list); + if (list_empty(&qpd->queues_list)) + deallocate_vmid(dqm, qpd, q); + dqm->queue_count--; +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int update_queue(struct device_queue_manager *dqm, struct queue *q) +{ + int retval; + struct mqd_manager *mqd; + + BUG_ON(!dqm || !q || !q->mqd); + + mutex_lock(&dqm->lock); + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + if (mqd == NULL) { + mutex_unlock(&dqm->lock); + return -ENOMEM; + } + + retval = mqd->update_mqd(mqd, q->mqd, &q->properties); + if (q->properties.is_active == true) + dqm->queue_count++; + else + dqm->queue_count--; + + if (sched_policy != KFD_SCHED_POLICY_NO_HWS) + retval = execute_queues_cpsch(dqm, false); + + mutex_unlock(&dqm->lock); + return retval; +} + +static struct mqd_manager *get_mqd_manager_nocpsch( + struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) +{ + struct mqd_manager *mqd; + + BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s mqd type %d\n", __func__, type); + + mqd = dqm->mqds[type]; + if (!mqd) { + mqd = mqd_manager_init(type, dqm->dev); + if (mqd == NULL) + pr_err("kfd: mqd manager is NULL"); + dqm->mqds[type] = mqd; + } + + return mqd; +} + +static int register_process_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct device_process_node *n; + + BUG_ON(!dqm || !qpd); + + pr_debug("kfd: In func %s\n", __func__); + + n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); + if (!n) + return -ENOMEM; + + n->qpd = qpd; + + mutex_lock(&dqm->lock); + list_add(&n->list, &dqm->queues); + + init_process_memory(dqm, qpd); + dqm->processes_count++; + + mutex_unlock(&dqm->lock); + + return 0; +} + +static int unregister_process_nocpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + int retval; + struct device_process_node *cur, *next; + + BUG_ON(!dqm || !qpd); + + BUG_ON(!list_empty(&qpd->queues_list)); + + pr_debug("kfd: In func %s\n", __func__); + + retval = 0; + mutex_lock(&dqm->lock); + + list_for_each_entry_safe(cur, next, &dqm->queues, list) { + if (qpd == cur->qpd) { + list_del(&cur->list); + kfree(cur); + dqm->processes_count--; + goto out; + } + } + /* qpd not found in dqm list */ + retval = 1; +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int +set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, + unsigned int vmid) +{ + uint32_t pasid_mapping; + + pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID_PASID_MAPPING_VALID; + return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping, + vmid); +} + +static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) +{ + /* In 64-bit mode, we can only control the top 3 bits of the LDS, + * scratch and GPUVM apertures. + * The hardware fills in the remaining 59 bits according to the + * following pattern: + * LDS: X0000000'00000000 - X0000001'00000000 (4GB) + * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) + * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) + * + * (where X/Y is the configurable nybble with the low-bit 0) + * + * LDS and scratch will have the same top nybble programmed in the + * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. + * GPUVM can have a different top nybble programmed in the + * top 3 bits of SH_MEM_BASES.SHARED_BASE. + * We don't bother to support different top nybbles + * for LDS/Scratch and GPUVM. + */ + + BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || + top_address_nybble == 0); + + return PRIVATE_BASE(top_address_nybble << 12) | + SHARED_BASE(top_address_nybble << 12); +} + +static int init_memory(struct device_queue_manager *dqm) +{ + int i, retval; + + for (i = 8; i < 16; i++) + set_pasid_vmid_mapping(dqm, 0, i); + + retval = kfd2kgd->init_memory(dqm->dev->kgd); + if (retval == 0) + is_mem_initialized = true; + return retval; +} + + +static int init_pipelines(struct device_queue_manager *dqm, + unsigned int pipes_num, unsigned int first_pipe) +{ + void *hpdptr; + struct mqd_manager *mqd; + unsigned int i, err, inx; + uint64_t pipe_hpd_addr; + + BUG_ON(!dqm || !dqm->dev); + + pr_debug("kfd: In func %s\n", __func__); + + /* + * Allocate memory for the HPDs. This is hardware-owned per-pipe data. + * The driver never accesses this memory after zeroing it. + * It doesn't even have to be saved/restored on suspend/resume + * because it contains no data when there are no active queues. + */ + + err = kfd2kgd->allocate_mem(dqm->dev->kgd, + CIK_HPD_EOP_BYTES * pipes_num, + PAGE_SIZE, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &dqm->pipeline_mem); + + if (err) { + pr_err("kfd: error allocate vidmem num pipes: %d\n", + pipes_num); + return -ENOMEM; + } + + hpdptr = dqm->pipeline_mem->cpu_ptr; + dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr; + + memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); + + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); + if (mqd == NULL) { + kfd2kgd->free_mem(dqm->dev->kgd, + (struct kgd_mem *) dqm->pipeline_mem); + return -ENOMEM; + } + + for (i = 0; i < pipes_num; i++) { + inx = i + first_pipe; + pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES; + pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr); + /* = log2(bytes/4)-1 */ + kfd2kgd->init_pipeline(dqm->dev->kgd, i, + CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr); + } + + return 0; +} + + +static int init_scheduler(struct device_queue_manager *dqm) +{ + int retval; + + BUG_ON(!dqm); + + pr_debug("kfd: In %s\n", __func__); + + retval = init_pipelines(dqm, get_pipes_num(dqm), KFD_DQM_FIRST_PIPE); + if (retval != 0) + return retval; + + retval = init_memory(dqm); + + return retval; +} + +static int initialize_nocpsch(struct device_queue_manager *dqm) +{ + int i; + + BUG_ON(!dqm); + + pr_debug("kfd: In func %s num of pipes: %d\n", + __func__, get_pipes_num(dqm)); + + mutex_init(&dqm->lock); + INIT_LIST_HEAD(&dqm->queues); + dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->allocated_queues = kcalloc(get_pipes_num(dqm), + sizeof(unsigned int), GFP_KERNEL); + if (!dqm->allocated_queues) { + mutex_destroy(&dqm->lock); + return -ENOMEM; + } + + for (i = 0; i < get_pipes_num(dqm); i++) + dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; + + dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; + + init_scheduler(dqm); + return 0; +} + +static void uninitialize_nocpsch(struct device_queue_manager *dqm) +{ + int i; + + BUG_ON(!dqm); + + BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + + kfree(dqm->allocated_queues); + for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) + kfree(dqm->mqds[i]); + mutex_destroy(&dqm->lock); + kfd2kgd->free_mem(dqm->dev->kgd, + (struct kgd_mem *) dqm->pipeline_mem); +} + +static int start_nocpsch(struct device_queue_manager *dqm) +{ + return 0; +} + +static int stop_nocpsch(struct device_queue_manager *dqm) +{ + return 0; +} + +/* + * Device Queue Manager implementation for cp scheduler + */ + +static int set_sched_resources(struct device_queue_manager *dqm) +{ + struct scheduling_resources res; + unsigned int queue_num, queue_mask; + + BUG_ON(!dqm); + + pr_debug("kfd: In func %s\n", __func__); + + queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE; + queue_mask = (1 << queue_num) - 1; + res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; + res.vmid_mask <<= KFD_VMID_START_OFFSET; + res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE); + res.gws_mask = res.oac_mask = res.gds_heap_base = + res.gds_heap_size = 0; + + pr_debug("kfd: scheduling resources:\n" + " vmid mask: 0x%8X\n" + " queue mask: 0x%8llX\n", + res.vmid_mask, res.queue_mask); + + return pm_send_set_resources(&dqm->packets, &res); +} + +static int initialize_cpsch(struct device_queue_manager *dqm) +{ + int retval; + + BUG_ON(!dqm); + + pr_debug("kfd: In func %s num of pipes: %d\n", + __func__, get_pipes_num_cpsch()); + + mutex_init(&dqm->lock); + INIT_LIST_HEAD(&dqm->queues); + dqm->queue_count = dqm->processes_count = 0; + dqm->active_runlist = false; + retval = init_pipelines(dqm, get_pipes_num(dqm), 0); + if (retval != 0) + goto fail_init_pipelines; + + return 0; + +fail_init_pipelines: + mutex_destroy(&dqm->lock); + return retval; +} + +static int start_cpsch(struct device_queue_manager *dqm) +{ + struct device_process_node *node; + int retval; + + BUG_ON(!dqm); + + retval = 0; + + retval = pm_init(&dqm->packets, dqm); + if (retval != 0) + goto fail_packet_manager_init; + + retval = set_sched_resources(dqm); + if (retval != 0) + goto fail_set_sched_resources; + + pr_debug("kfd: allocating fence memory\n"); + + /* allocate fence memory on the gart */ + retval = kfd2kgd->allocate_mem(dqm->dev->kgd, + sizeof(*dqm->fence_addr), + 32, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &dqm->fence_mem); + + if (retval != 0) + goto fail_allocate_vidmem; + + dqm->fence_addr = dqm->fence_mem->cpu_ptr; + dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; + + list_for_each_entry(node, &dqm->queues, list) + if (node->qpd->pqm->process && dqm->dev) + kfd_bind_process_to_device(dqm->dev, + node->qpd->pqm->process); + + execute_queues_cpsch(dqm, true); + + return 0; +fail_allocate_vidmem: +fail_set_sched_resources: + pm_uninit(&dqm->packets); +fail_packet_manager_init: + return retval; +} + +static int stop_cpsch(struct device_queue_manager *dqm) +{ + struct device_process_node *node; + struct kfd_process_device *pdd; + + BUG_ON(!dqm); + + destroy_queues_cpsch(dqm, true); + + list_for_each_entry(node, &dqm->queues, list) { + pdd = qpd_to_pdd(node->qpd); + pdd->bound = false; + } + kfd2kgd->free_mem(dqm->dev->kgd, + (struct kgd_mem *) dqm->fence_mem); + pm_uninit(&dqm->packets); + + return 0; +} + +static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, + struct kernel_queue *kq, + struct qcm_process_device *qpd) +{ + BUG_ON(!dqm || !kq || !qpd); + + pr_debug("kfd: In func %s\n", __func__); + + mutex_lock(&dqm->lock); + list_add(&kq->list, &qpd->priv_queue_list); + dqm->queue_count++; + qpd->is_debug = true; + execute_queues_cpsch(dqm, false); + mutex_unlock(&dqm->lock); + + return 0; +} + +static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, + struct kernel_queue *kq, + struct qcm_process_device *qpd) +{ + BUG_ON(!dqm || !kq); + + pr_debug("kfd: In %s\n", __func__); + + mutex_lock(&dqm->lock); + destroy_queues_cpsch(dqm, false); + list_del(&kq->list); + dqm->queue_count--; + qpd->is_debug = false; + execute_queues_cpsch(dqm, false); + mutex_unlock(&dqm->lock); +} + +static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd, int *allocate_vmid) +{ + int retval; + struct mqd_manager *mqd; + + BUG_ON(!dqm || !q || !qpd); + + retval = 0; + + if (allocate_vmid) + *allocate_vmid = 0; + + mutex_lock(&dqm->lock); + + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + if (mqd == NULL) { + mutex_unlock(&dqm->lock); + return -ENOMEM; + } + + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, + &q->gart_mqd_addr, &q->properties); + if (retval != 0) + goto out; + + list_add(&q->list, &qpd->queues_list); + if (q->properties.is_active) { + dqm->queue_count++; + retval = execute_queues_cpsch(dqm, false); + } + +out: + mutex_unlock(&dqm->lock); + return retval; +} + +static int fence_wait_timeout(unsigned int *fence_addr, + unsigned int fence_value, + unsigned long timeout) +{ + BUG_ON(!fence_addr); + timeout += jiffies; + + while (*fence_addr != fence_value) { + if (time_after(jiffies, timeout)) { + pr_err("kfd: qcm fence wait loop timeout expired\n"); + return -ETIME; + } + cpu_relax(); + } + + return 0; +} + +static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) +{ + int retval; + + BUG_ON(!dqm); + + retval = 0; + + if (lock) + mutex_lock(&dqm->lock); + if (dqm->active_runlist == false) + goto out; + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); + if (retval != 0) + goto out; + + *dqm->fence_addr = KFD_FENCE_INIT; + pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, + KFD_FENCE_COMPLETED); + /* should be timed out */ + fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, + QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); + pm_release_ib(&dqm->packets); + dqm->active_runlist = false; + +out: + if (lock) + mutex_unlock(&dqm->lock); + return retval; +} + +static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) +{ + int retval; + + BUG_ON(!dqm); + + if (lock) + mutex_lock(&dqm->lock); + + retval = destroy_queues_cpsch(dqm, false); + if (retval != 0) { + pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); + goto out; + } + + if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { + retval = 0; + goto out; + } + + if (dqm->active_runlist) { + retval = 0; + goto out; + } + + retval = pm_send_runlist(&dqm->packets, &dqm->queues); + if (retval != 0) { + pr_err("kfd: failed to execute runlist"); + goto out; + } + dqm->active_runlist = true; + +out: + if (lock) + mutex_unlock(&dqm->lock); + return retval; +} + +static int destroy_queue_cpsch(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q) +{ + int retval; + struct mqd_manager *mqd; + + BUG_ON(!dqm || !qpd || !q); + + retval = 0; + + /* remove queue from list to prevent rescheduling after preemption */ + mutex_lock(&dqm->lock); + + mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); + if (!mqd) { + retval = -ENOMEM; + goto failed; + } + + list_del(&q->list); + dqm->queue_count--; + + execute_queues_cpsch(dqm, false); + + mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); + + mutex_unlock(&dqm->lock); + + return 0; + +failed: + mutex_unlock(&dqm->lock); + return retval; +} + +/* + * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to + * stay in user mode. + */ +#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL +/* APE1 limit is inclusive and 64K aligned. */ +#define APE1_LIMIT_ALIGNMENT 0xFFFF + +static bool set_cache_memory_policy(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size) +{ + uint32_t default_mtype; + uint32_t ape1_mtype; + + pr_debug("kfd: In func %s\n", __func__); + + mutex_lock(&dqm->lock); + + if (alternate_aperture_size == 0) { + /* base > limit disables APE1 */ + qpd->sh_mem_ape1_base = 1; + qpd->sh_mem_ape1_limit = 0; + } else { + /* + * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, + * SH_MEM_APE1_BASE[31:0], 0x0000 } + * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, + * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } + * Verify that the base and size parameters can be + * represented in this format and convert them. + * Additionally restrict APE1 to user-mode addresses. + */ + + uint64_t base = (uintptr_t)alternate_aperture_base; + uint64_t limit = base + alternate_aperture_size - 1; + + if (limit <= base) + goto out; + + if ((base & APE1_FIXED_BITS_MASK) != 0) + goto out; + + if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) + goto out; + + qpd->sh_mem_ape1_base = base >> 16; + qpd->sh_mem_ape1_limit = limit >> 16; + } + + default_mtype = (default_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + ape1_mtype = (alternate_policy == cache_policy_coherent) ? + MTYPE_NONCACHED : + MTYPE_CACHED; + + qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) + | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) + | DEFAULT_MTYPE(default_mtype) + | APE1_MTYPE(ape1_mtype); + + if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) + program_sh_mem_settings(dqm, qpd); + + pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", + qpd->sh_mem_config, qpd->sh_mem_ape1_base, + qpd->sh_mem_ape1_limit); + + mutex_unlock(&dqm->lock); + return true; + +out: + mutex_unlock(&dqm->lock); + return false; +} + +struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) +{ + struct device_queue_manager *dqm; + + BUG_ON(!dev); + + dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); + if (!dqm) + return NULL; + + dqm->dev = dev; + switch (sched_policy) { + case KFD_SCHED_POLICY_HWS: + case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: + /* initialize dqm for cp scheduling */ + dqm->create_queue = create_queue_cpsch; + dqm->initialize = initialize_cpsch; + dqm->start = start_cpsch; + dqm->stop = stop_cpsch; + dqm->destroy_queue = destroy_queue_cpsch; + dqm->update_queue = update_queue; + dqm->get_mqd_manager = get_mqd_manager_nocpsch; + dqm->register_process = register_process_nocpsch; + dqm->unregister_process = unregister_process_nocpsch; + dqm->uninitialize = uninitialize_nocpsch; + dqm->create_kernel_queue = create_kernel_queue_cpsch; + dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch; + dqm->set_cache_memory_policy = set_cache_memory_policy; + break; + case KFD_SCHED_POLICY_NO_HWS: + /* initialize dqm for no cp scheduling */ + dqm->start = start_nocpsch; + dqm->stop = stop_nocpsch; + dqm->create_queue = create_queue_nocpsch; + dqm->destroy_queue = destroy_queue_nocpsch; + dqm->update_queue = update_queue; + dqm->get_mqd_manager = get_mqd_manager_nocpsch; + dqm->register_process = register_process_nocpsch; + dqm->unregister_process = unregister_process_nocpsch; + dqm->initialize = initialize_nocpsch; + dqm->uninitialize = uninitialize_nocpsch; + dqm->set_cache_memory_policy = set_cache_memory_policy; + break; + default: + BUG(); + break; + } + + if (dqm->initialize(dqm) != 0) { + kfree(dqm); + return NULL; + } + + return dqm; +} + +void device_queue_manager_uninit(struct device_queue_manager *dqm) +{ + BUG_ON(!dqm); + + dqm->uninitialize(dqm); + kfree(dqm); +} + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..c3f189e8ae35da5527efebe3ae016b47b2b3342c --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -0,0 +1,146 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_DEVICE_QUEUE_MANAGER_H_ +#define KFD_DEVICE_QUEUE_MANAGER_H_ + +#include +#include +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" + +#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500) +#define QUEUES_PER_PIPE (8) +#define PIPE_PER_ME_CP_SCHEDULING (3) +#define CIK_VMID_NUM (8) +#define KFD_VMID_START_OFFSET (8) +#define VMID_PER_DEVICE CIK_VMID_NUM +#define KFD_DQM_FIRST_PIPE (0) + +struct device_process_node { + struct qcm_process_device *qpd; + struct list_head list; +}; + +/** + * struct device_queue_manager + * + * @create_queue: Queue creation routine. + * + * @destroy_queue: Queue destruction routine. + * + * @update_queue: Queue update routine. + * + * @get_mqd_manager: Returns the mqd manager according to the mqd type. + * + * @exeute_queues: Dispatches the queues list to the H/W. + * + * @register_process: This routine associates a specific process with device. + * + * @unregister_process: destroys the associations between process to device. + * + * @initialize: Initializes the pipelines and memory module for that device. + * + * @start: Initializes the resources/modules the the device needs for queues + * execution. This function is called on device initialization and after the + * system woke up after suspension. + * + * @stop: This routine stops execution of all the active queue running on the + * H/W and basically this function called on system suspend. + * + * @uninitialize: Destroys all the device queue manager resources allocated in + * initialize routine. + * + * @create_kernel_queue: Creates kernel queue. Used for debug queue. + * + * @destroy_kernel_queue: Destroys kernel queue. Used for debug queue. + * + * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the + * memory apertures. + * + * This struct is a base class for the kfd queues scheduler in the + * device level. The device base class should expose the basic operations + * for queue creation and queue destruction. This base class hides the + * scheduling mode of the driver and the specific implementation of the + * concrete device. This class is the only class in the queues scheduler + * that configures the H/W. + */ + +struct device_queue_manager { + int (*create_queue)(struct device_queue_manager *dqm, + struct queue *q, + struct qcm_process_device *qpd, + int *allocate_vmid); + int (*destroy_queue)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + struct queue *q); + int (*update_queue)(struct device_queue_manager *dqm, + struct queue *q); + + struct mqd_manager * (*get_mqd_manager) + (struct device_queue_manager *dqm, + enum KFD_MQD_TYPE type); + + int (*register_process)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + int (*unregister_process)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); + int (*initialize)(struct device_queue_manager *dqm); + int (*start)(struct device_queue_manager *dqm); + int (*stop)(struct device_queue_manager *dqm); + void (*uninitialize)(struct device_queue_manager *dqm); + int (*create_kernel_queue)(struct device_queue_manager *dqm, + struct kernel_queue *kq, + struct qcm_process_device *qpd); + void (*destroy_kernel_queue)(struct device_queue_manager *dqm, + struct kernel_queue *kq, + struct qcm_process_device *qpd); + bool (*set_cache_memory_policy)(struct device_queue_manager *dqm, + struct qcm_process_device *qpd, + enum cache_policy default_policy, + enum cache_policy alternate_policy, + void __user *alternate_aperture_base, + uint64_t alternate_aperture_size); + + + struct mqd_manager *mqds[KFD_MQD_TYPE_MAX]; + struct packet_manager packets; + struct kfd_dev *dev; + struct mutex lock; + struct list_head queues; + unsigned int processes_count; + unsigned int queue_count; + unsigned int next_pipe_to_allocate; + unsigned int *allocated_queues; + unsigned int vmid_bitmap; + uint64_t pipelines_addr; + struct kfd_mem_obj *pipeline_mem; + uint64_t fence_gpu_addr; + unsigned int *fence_addr; + struct kfd_mem_obj *fence_mem; + bool active_runlist; +}; + + + +#endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c new file mode 100644 index 0000000000000000000000000000000000000000..b5791a5c7c06156f73121629383ca9618741f853 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -0,0 +1,256 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "kfd_priv.h" +#include +#include +#include +#include + +/* + * This extension supports a kernel level doorbells management for + * the kernel queues. + * Basically the last doorbells page is devoted to kernel queues + * and that's assures that any user process won't get access to the + * kernel doorbells page + */ +static DEFINE_MUTEX(doorbell_mutex); +static unsigned long doorbell_available_index[ + DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)] = { 0 }; + +#define KERNEL_DOORBELL_PASID 1 +#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4 + +/* + * Each device exposes a doorbell aperture, a PCI MMIO aperture that + * receives 32-bit writes that are passed to queues as wptr values. + * The doorbells are intended to be written by applications as part + * of queueing work on user-mode queues. + * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks. + * We map the doorbell address space into user-mode when a process creates + * its first queue on each device. + * Although the mapping is done by KFD, it is equivalent to an mmap of + * the /dev/kfd with the particular device encoded in the mmap offset. + * There will be other uses for mmap of /dev/kfd, so only a range of + * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells. + */ + +/* # of doorbell bytes allocated for each process. */ +static inline size_t doorbell_process_allocation(void) +{ + return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + PAGE_SIZE); +} + +/* Doorbell calculations for device init. */ +void kfd_doorbell_init(struct kfd_dev *kfd) +{ + size_t doorbell_start_offset; + size_t doorbell_aperture_size; + size_t doorbell_process_limit; + + /* + * We start with calculations in bytes because the input data might + * only be byte-aligned. + * Only after we have done the rounding can we assume any alignment. + */ + + doorbell_start_offset = + roundup(kfd->shared_resources.doorbell_start_offset, + doorbell_process_allocation()); + + doorbell_aperture_size = + rounddown(kfd->shared_resources.doorbell_aperture_size, + doorbell_process_allocation()); + + if (doorbell_aperture_size > doorbell_start_offset) + doorbell_process_limit = + (doorbell_aperture_size - doorbell_start_offset) / + doorbell_process_allocation(); + else + doorbell_process_limit = 0; + + kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + + doorbell_start_offset; + + kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); + kfd->doorbell_process_limit = doorbell_process_limit - 1; + + kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, + doorbell_process_allocation()); + + BUG_ON(!kfd->doorbell_kernel_ptr); + + pr_debug("kfd: doorbell initialization:\n"); + pr_debug("kfd: doorbell base == 0x%08lX\n", + (uintptr_t)kfd->doorbell_base); + + pr_debug("kfd: doorbell_id_offset == 0x%08lX\n", + kfd->doorbell_id_offset); + + pr_debug("kfd: doorbell_process_limit == 0x%08lX\n", + doorbell_process_limit); + + pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n", + (uintptr_t)kfd->doorbell_base); + + pr_debug("kfd: doorbell aperture size == 0x%08lX\n", + kfd->shared_resources.doorbell_aperture_size); + + pr_debug("kfd: doorbell kernel address == 0x%08lX\n", + (uintptr_t)kfd->doorbell_kernel_ptr); +} + +int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) +{ + phys_addr_t address; + struct kfd_dev *dev; + + /* + * For simplicitly we only allow mapping of the entire doorbell + * allocation of a single device & process. + */ + if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) + return -EINVAL; + + /* Find kfd device according to gpu id */ + dev = kfd_device_by_id(vma->vm_pgoff); + if (dev == NULL) + return -EINVAL; + + /* Find if pdd exists for combination of process and gpu id */ + if (!kfd_get_process_device_data(dev, process, 0)) + return -EINVAL; + + /* Calculate physical address of doorbell */ + address = kfd_get_process_doorbells(dev, process); + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | + VM_DONTDUMP | VM_PFNMAP; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + pr_debug("kfd: mapping doorbell page in kfd_doorbell_mmap\n" + " target user address == 0x%08llX\n" + " physical address == 0x%08llX\n" + " vm_flags == 0x%04lX\n" + " size == 0x%04lX\n", + (unsigned long long) vma->vm_start, address, vma->vm_flags, + doorbell_process_allocation()); + + + return io_remap_pfn_range(vma, + vma->vm_start, + address >> PAGE_SHIFT, + doorbell_process_allocation(), + vma->vm_page_prot); +} + + +/* get kernel iomem pointer for a doorbell */ +u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + unsigned int *doorbell_off) +{ + u32 inx; + + BUG_ON(!kfd || !doorbell_off); + + mutex_lock(&doorbell_mutex); + inx = find_first_zero_bit(doorbell_available_index, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + + __set_bit(inx, doorbell_available_index); + mutex_unlock(&doorbell_mutex); + + if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) + return NULL; + + /* + * Calculating the kernel doorbell offset using "faked" kernel + * pasid that allocated for kernel queues only + */ + *doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() / + sizeof(u32)) + inx; + + pr_debug("kfd: get kernel queue doorbell\n" + " doorbell offset == 0x%08d\n" + " kernel address == 0x%08lX\n", + *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); + + return kfd->doorbell_kernel_ptr + inx; +} + +void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) +{ + unsigned int inx; + + BUG_ON(!kfd || !db_addr); + + inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr); + + mutex_lock(&doorbell_mutex); + __clear_bit(inx, doorbell_available_index); + mutex_unlock(&doorbell_mutex); +} + +inline void write_kernel_doorbell(u32 __iomem *db, u32 value) +{ + if (db) { + writel(value, db); + pr_debug("writing %d to doorbell address 0x%p\n", value, db); + } +} + +/* + * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 + * to doorbells with the process's doorbell page + */ +unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, + struct kfd_process *process, + unsigned int queue_id) +{ + /* + * doorbell_id_offset accounts for doorbells taken by KGD. + * pasid * doorbell_process_allocation/sizeof(u32) adjusts + * to the process's doorbells + */ + return kfd->doorbell_id_offset + + process->pasid * (doorbell_process_allocation()/sizeof(u32)) + + queue_id; +} + +uint64_t kfd_get_number_elems(struct kfd_dev *kfd) +{ + uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - + kfd->shared_resources.doorbell_start_offset) / + doorbell_process_allocation() + 1; + + return num_of_elems; + +} + +phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, + struct kfd_process *process) +{ + return dev->doorbell_base + + process->pasid * doorbell_process_allocation(); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c new file mode 100644 index 0000000000000000000000000000000000000000..66df4da01c29a200d64a6a801d8d250231d7d269 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -0,0 +1,356 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include +#include +#include + +/* + * The primary memory I/O features being added for revisions of gfxip + * beyond 7.0 (Kaveri) are: + * + * Access to ATC/IOMMU mapped memory w/ associated extension of VA to 48b + * + * “Flat” shader memory access – These are new shader vector memory + * operations that do not reference a T#/V# so a “pointer” is what is + * sourced from the vector gprs for direct access to memory. + * This pointer space has the Shared(LDS) and Private(Scratch) memory + * mapped into this pointer space as apertures. + * The hardware then determines how to direct the memory request + * based on what apertures the request falls in. + * + * Unaligned support and alignment check + * + * + * System Unified Address - SUA + * + * The standard usage for GPU virtual addresses are that they are mapped by + * a set of page tables we call GPUVM and these page tables are managed by + * a combination of vidMM/driver software components. The current virtual + * address (VA) range for GPUVM is 40b. + * + * As of gfxip7.1 and beyond we’re adding the ability for compute memory + * clients (CP/RLC, DMA, SHADER(ifetch, scalar, and vector ops)) to access + * the same page tables used by host x86 processors and that are managed by + * the operating system. This is via a technique and hardware called ATC/IOMMU. + * The GPU has the capability of accessing both the GPUVM and ATC address + * spaces for a given VMID (process) simultaneously and we call this feature + * system unified address (SUA). + * + * There are three fundamental address modes of operation for a given VMID + * (process) on the GPU: + * + * HSA64 – 64b pointers and the default address space is ATC + * HSA32 – 32b pointers and the default address space is ATC + * GPUVM – 64b pointers and the default address space is GPUVM (driver + * model mode) + * + * + * HSA64 - ATC/IOMMU 64b + * + * A 64b pointer in the AMD64/IA64 CPU architecture is not fully utilized + * by the CPU so an AMD CPU can only access the high area + * (VA[63:47] == 0x1FFFF) and low area (VA[63:47 == 0) of the address space + * so the actual VA carried to translation is 48b. There is a “hole” in + * the middle of the 64b VA space. + * + * The GPU not only has access to all of the CPU accessible address space via + * ATC/IOMMU, but it also has access to the GPUVM address space. The “system + * unified address” feature (SUA) is the mapping of GPUVM and ATC address + * spaces into a unified pointer space. The method we take for 64b mode is + * to map the full 40b GPUVM address space into the hole of the 64b address + * space. + + * The GPUVM_Base/GPUVM_Limit defines the aperture in the 64b space where we + * direct requests to be translated via GPUVM page tables instead of the + * IOMMU path. + * + * + * 64b to 49b Address conversion + * + * Note that there are still significant portions of unused regions (holes) + * in the 64b address space even for the GPU. There are several places in + * the pipeline (sw and hw), we wish to compress the 64b virtual address + * to a 49b address. This 49b address is constituted of an “ATC” bit + * plus a 48b virtual address. This 49b address is what is passed to the + * translation hardware. ATC==0 means the 48b address is a GPUVM address + * (max of 2^40 – 1) intended to be translated via GPUVM page tables. + * ATC==1 means the 48b address is intended to be translated via IOMMU + * page tables. + * + * A 64b pointer is compared to the apertures that are defined (Base/Limit), in + * this case the GPUVM aperture (red) is defined and if a pointer falls in this + * aperture, we subtract the GPUVM_Base address and set the ATC bit to zero + * as part of the 64b to 49b conversion. + * + * Where this 64b to 49b conversion is done is a function of the usage. + * Most GPU memory access is via memory objects where the driver builds + * a descriptor which consists of a base address and a memory access by + * the GPU usually consists of some kind of an offset or Cartesian coordinate + * that references this memory descriptor. This is the case for shader + * instructions that reference the T# or V# constants, or for specified + * locations of assets (ex. the shader program location). In these cases + * the driver is what handles the 64b to 49b conversion and the base + * address in the descriptor (ex. V# or T# or shader program location) + * is defined as a 48b address w/ an ATC bit. For this usage a given + * memory object cannot straddle multiple apertures in the 64b address + * space. For example a shader program cannot jump in/out between ATC + * and GPUVM space. + * + * In some cases we wish to pass a 64b pointer to the GPU hardware and + * the GPU hw does the 64b to 49b conversion before passing memory + * requests to the cache/memory system. This is the case for the + * S_LOAD and FLAT_* shader memory instructions where we have 64b pointers + * in scalar and vector GPRs respectively. + * + * In all cases (no matter where the 64b -> 49b conversion is done), the gfxip + * hardware sends a 48b address along w/ an ATC bit, to the memory controller + * on the memory request interfaces. + * + * _MC_rdreq_atc // read request ATC bit + * + * 0 : _MC_rdreq_addr is a GPUVM VA + * + * 1 : _MC_rdreq_addr is a ATC VA + * + * + * “Spare” aperture (APE1) + * + * We use the GPUVM aperture to differentiate ATC vs. GPUVM, but we also use + * apertures to set the Mtype field for S_LOAD/FLAT_* ops which is input to the + * config tables for setting cache policies. The “spare” (APE1) aperture is + * motivated by getting a different Mtype from the default. + * The default aperture isn’t an actual base/limit aperture; it is just the + * address space that doesn’t hit any defined base/limit apertures. + * The following diagram is a complete picture of the gfxip7.x SUA apertures. + * The APE1 can be placed either below or above + * the hole (cannot be in the hole). + * + * + * General Aperture definitions and rules + * + * An aperture register definition consists of a Base, Limit, Mtype, and + * usually an ATC bit indicating which translation tables that aperture uses. + * In all cases (for SUA and DUA apertures discussed later), aperture base + * and limit definitions are 64KB aligned. + * + * _Base[63:0] = { _Base_register[63:16], 0x0000 } + * + * _Limit[63:0] = { _Limit_register[63:16], 0xFFFF } + * + * The base and limit are considered inclusive to an aperture so being + * inside an aperture means (address >= Base) AND (address <= Limit). + * + * In no case is a payload that straddles multiple apertures expected to work. + * For example a load_dword_x4 that starts in one aperture and ends in another, + * does not work. For the vector FLAT_* ops we have detection capability in + * the shader for reporting a “memory violation” back to the + * SQ block for use in traps. + * A memory violation results when an op falls into the hole, + * or a payload straddles multiple apertures. The S_LOAD instruction + * does not have this detection. + * + * Apertures cannot overlap. + * + * + * + * HSA32 - ATC/IOMMU 32b + * + * For HSA32 mode, the pointers are interpreted as 32 bits and use a single GPR + * instead of two for the S_LOAD and FLAT_* ops. The entire GPUVM space of 40b + * will not fit so there is only partial visibility to the GPUVM + * space (defined by the aperture) for S_LOAD and FLAT_* ops. + * There is no spare (APE1) aperture for HSA32 mode. + * + * + * GPUVM 64b mode (driver model) + * + * This mode is related to HSA64 in that the difference really is that + * the default aperture is GPUVM (ATC==0) and not ATC space. + * We have gfxip7.x hardware that has FLAT_* and S_LOAD support for + * SUA GPUVM mode, but does not support HSA32/HSA64. + * + * + * Device Unified Address - DUA + * + * Device unified address (DUA) is the name of the feature that maps the + * Shared(LDS) memory and Private(Scratch) memory into the overall address + * space for use by the new FLAT_* vector memory ops. The Shared and + * Private memories are mapped as apertures into the address space, + * and the hardware detects when a FLAT_* memory request is to be redirected + * to the LDS or Scratch memory when it falls into one of these apertures. + * Like the SUA apertures, the Shared/Private apertures are 64KB aligned and + * the base/limit is “in” the aperture. For both HSA64 and GPUVM SUA modes, + * the Shared/Private apertures are always placed in a limited selection of + * options in the hole of the 64b address space. For HSA32 mode, the + * Shared/Private apertures can be placed anywhere in the 32b space + * except at 0. + * + * + * HSA64 Apertures for FLAT_* vector ops + * + * For HSA64 SUA mode, the Shared and Private apertures are always placed + * in the hole w/ a limited selection of possible locations. The requests + * that fall in the private aperture are expanded as a function of the + * work-item id (tid) and redirected to the location of the + * “hidden private memory”. The hidden private can be placed in either GPUVM + * or ATC space. The addresses that fall in the shared aperture are + * re-directed to the on-chip LDS memory hardware. + * + * + * HSA32 Apertures for FLAT_* vector ops + * + * In HSA32 mode, the Private and Shared apertures can be placed anywhere + * in the 32b space except at 0 (Private or Shared Base at zero disables + * the apertures). If the base address of the apertures are non-zero + * (ie apertures exists), the size is always 64KB. + * + * + * GPUVM Apertures for FLAT_* vector ops + * + * In GPUVM mode, the Shared/Private apertures are specified identically + * to HSA64 mode where they are always in the hole at a limited selection + * of locations. + * + * + * Aperture Definitions for SUA and DUA + * + * The interpretation of the aperture register definitions for a given + * VMID is a function of the “SUA Mode” which is one of HSA64, HSA32, or + * GPUVM64 discussed in previous sections. The mode is first decoded, and + * then the remaining register decode is a function of the mode. + * + * + * SUA Mode Decode + * + * For the S_LOAD and FLAT_* shader operations, the SUA mode is decoded from + * the COMPUTE_DISPATCH_INITIATOR:DATA_ATC bit and + * the SH_MEM_CONFIG:PTR32 bits. + * + * COMPUTE_DISPATCH_INITIATOR:DATA_ATC SH_MEM_CONFIG:PTR32 Mode + * + * 1 0 HSA64 + * + * 1 1 HSA32 + * + * 0 X GPUVM64 + * + * In general the hardware will ignore the PTR32 bit and treat + * as “0” whenever DATA_ATC = “0”, but sw should set PTR32=0 + * when DATA_ATC=0. + * + * The DATA_ATC bit is only set for compute dispatches. + * All “Draw” dispatches are hardcoded to GPUVM64 mode + * for FLAT_* / S_LOAD operations. + */ + +#define MAKE_GPUVM_APP_BASE(gpu_num) \ + (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) + +#define MAKE_GPUVM_APP_LIMIT(base) \ + (((uint64_t)(base) & \ + 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL) + +#define MAKE_SCRATCH_APP_BASE(gpu_num) \ + (((uint64_t)(gpu_num) << 61) + 0x100000000L) + +#define MAKE_SCRATCH_APP_LIMIT(base) \ + (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) + +#define MAKE_LDS_APP_BASE(gpu_num) \ + (((uint64_t)(gpu_num) << 61) + 0x0) +#define MAKE_LDS_APP_LIMIT(base) \ + (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) + +int kfd_init_apertures(struct kfd_process *process) +{ + uint8_t id = 0; + struct kfd_dev *dev; + struct kfd_process_device *pdd; + + mutex_lock(&process->mutex); + + /*Iterating over all devices*/ + while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL && + id < NUM_OF_SUPPORTED_GPUS) { + + pdd = kfd_get_process_device_data(dev, process, 1); + + /* + * For 64 bit process aperture will be statically reserved in + * the x86_64 non canonical process address space + * amdkfd doesn't currently support apertures for 32 bit process + */ + if (process->is_32bit_user_mode) { + pdd->lds_base = pdd->lds_limit = 0; + pdd->gpuvm_base = pdd->gpuvm_limit = 0; + pdd->scratch_base = pdd->scratch_limit = 0; + } else { + /* + * node id couldn't be 0 - the three MSB bits of + * aperture shoudn't be 0 + */ + pdd->lds_base = MAKE_LDS_APP_BASE(id + 1); + + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); + + pdd->gpuvm_limit = + MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base); + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1); + + pdd->scratch_limit = + MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + } + + dev_dbg(kfd_device, "node id %u\n", id); + dev_dbg(kfd_device, "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, "scratch_limit %llX\n", pdd->scratch_limit); + + id++; + } + + mutex_unlock(&process->mutex); + + return 0; +} + + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c new file mode 100644 index 0000000000000000000000000000000000000000..5b999095a1f7e58de9b4c0be1e0f6f02a43e71ab --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -0,0 +1,176 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * KFD Interrupts. + * + * AMD GPUs deliver interrupts by pushing an interrupt description onto the + * interrupt ring and then sending an interrupt. KGD receives the interrupt + * in ISR and sends us a pointer to each new entry on the interrupt ring. + * + * We generally can't process interrupt-signaled events from ISR, so we call + * out to each interrupt client module (currently only the scheduler) to ask if + * each interrupt is interesting. If they return true, then it requires further + * processing so we copy it to an internal interrupt ring and call each + * interrupt client again from a work-queue. + * + * There's no acknowledgment for the interrupts we use. The hardware simply + * queues a new interrupt each time without waiting. + * + * The fixed-size internal queue means that it's possible for us to lose + * interrupts because we have no back-pressure to the hardware. + */ + +#include +#include +#include "kfd_priv.h" + +#define KFD_INTERRUPT_RING_SIZE 256 + +static void interrupt_wq(struct work_struct *); + +int kfd_interrupt_init(struct kfd_dev *kfd) +{ + void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE, + kfd->device_info->ih_ring_entry_size, + GFP_KERNEL); + if (!interrupt_ring) + return -ENOMEM; + + kfd->interrupt_ring = interrupt_ring; + kfd->interrupt_ring_size = + KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size; + atomic_set(&kfd->interrupt_ring_wptr, 0); + atomic_set(&kfd->interrupt_ring_rptr, 0); + + spin_lock_init(&kfd->interrupt_lock); + + INIT_WORK(&kfd->interrupt_work, interrupt_wq); + + kfd->interrupts_active = true; + + /* + * After this function returns, the interrupt will be enabled. This + * barrier ensures that the interrupt running on a different processor + * sees all the above writes. + */ + smp_wmb(); + + return 0; +} + +void kfd_interrupt_exit(struct kfd_dev *kfd) +{ + /* + * Stop the interrupt handler from writing to the ring and scheduling + * workqueue items. The spinlock ensures that any interrupt running + * after we have unlocked sees interrupts_active = false. + */ + unsigned long flags; + + spin_lock_irqsave(&kfd->interrupt_lock, flags); + kfd->interrupts_active = false; + spin_unlock_irqrestore(&kfd->interrupt_lock, flags); + + /* + * Flush_scheduled_work ensures that there are no outstanding + * work-queue items that will access interrupt_ring. New work items + * can't be created because we stopped interrupt handling above. + */ + flush_scheduled_work(); + + kfree(kfd->interrupt_ring); +} + +/* + * This assumes that it can't be called concurrently with itself + * but only with dequeue_ih_ring_entry. + */ +bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry) +{ + unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); + unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); + + if ((rptr - wptr) % kfd->interrupt_ring_size == + kfd->device_info->ih_ring_entry_size) { + /* This is very bad, the system is likely to hang. */ + dev_err_ratelimited(kfd_chardev(), + "Interrupt ring overflow, dropping interrupt.\n"); + return false; + } + + memcpy(kfd->interrupt_ring + wptr, ih_ring_entry, + kfd->device_info->ih_ring_entry_size); + + wptr = (wptr + kfd->device_info->ih_ring_entry_size) % + kfd->interrupt_ring_size; + smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */ + atomic_set(&kfd->interrupt_ring_wptr, wptr); + + return true; +} + +/* + * This assumes that it can't be called concurrently with itself + * but only with enqueue_ih_ring_entry. + */ +static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry) +{ + /* + * Assume that wait queues have an implicit barrier, i.e. anything that + * happened in the ISR before it queued work is visible. + */ + + unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr); + unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr); + + if (rptr == wptr) + return false; + + memcpy(ih_ring_entry, kfd->interrupt_ring + rptr, + kfd->device_info->ih_ring_entry_size); + + rptr = (rptr + kfd->device_info->ih_ring_entry_size) % + kfd->interrupt_ring_size; + + /* + * Ensure the rptr write update is not visible until + * memcpy has finished reading. + */ + smp_mb(); + atomic_set(&kfd->interrupt_ring_rptr, rptr); + + return true; +} + +static void interrupt_wq(struct work_struct *work) +{ + struct kfd_dev *dev = container_of(work, struct kfd_dev, + interrupt_work); + + uint32_t ih_ring_entry[DIV_ROUND_UP( + dev->device_info->ih_ring_entry_size, + sizeof(uint32_t))]; + + while (dequeue_ih_ring_entry(dev, ih_ring_entry)) + ; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c new file mode 100644 index 0000000000000000000000000000000000000000..93507141072495b9c7815b4d5cad2cf124b9abe3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -0,0 +1,353 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include +#include "kfd_kernel_queue.h" +#include "kfd_priv.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers.h" +#include "kfd_pm4_opcodes.h" + +#define PM4_COUNT_ZERO (((1 << 15) - 1) << 16) + +static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + struct queue_properties prop; + int retval; + union PM4_MES_TYPE_3_HEADER nop; + + BUG_ON(!kq || !dev); + BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ); + + pr_debug("kfd: In func %s initializing queue type %d size %d\n", + __func__, KFD_QUEUE_TYPE_HIQ, queue_size); + + nop.opcode = IT_NOP; + nop.type = PM4_TYPE_3; + nop.u32all |= PM4_COUNT_ZERO; + + kq->dev = dev; + kq->nop_packet = nop.u32all; + switch (type) { + case KFD_QUEUE_TYPE_DIQ: + case KFD_QUEUE_TYPE_HIQ: + kq->mqd = dev->dqm->get_mqd_manager(dev->dqm, + KFD_MQD_TYPE_CIK_HIQ); + break; + default: + BUG(); + break; + } + + if (kq->mqd == NULL) + return false; + + prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off); + + if (prop.doorbell_ptr == NULL) + goto err_get_kernel_doorbell; + + retval = kfd2kgd->allocate_mem(dev->kgd, + queue_size, + PAGE_SIZE, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &kq->pq); + + if (retval != 0) + goto err_pq_allocate_vidmem; + + kq->pq_kernel_addr = kq->pq->cpu_ptr; + kq->pq_gpu_addr = kq->pq->gpu_addr; + + retval = kfd2kgd->allocate_mem(dev->kgd, + sizeof(*kq->rptr_kernel), + 32, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &kq->rptr_mem); + + if (retval != 0) + goto err_rptr_allocate_vidmem; + + kq->rptr_kernel = kq->rptr_mem->cpu_ptr; + kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; + + retval = kfd2kgd->allocate_mem(dev->kgd, + sizeof(*kq->wptr_kernel), + 32, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &kq->wptr_mem); + + if (retval != 0) + goto err_wptr_allocate_vidmem; + + kq->wptr_kernel = kq->wptr_mem->cpu_ptr; + kq->wptr_gpu_addr = kq->wptr_mem->gpu_addr; + + memset(kq->pq_kernel_addr, 0, queue_size); + memset(kq->rptr_kernel, 0, sizeof(*kq->rptr_kernel)); + memset(kq->wptr_kernel, 0, sizeof(*kq->wptr_kernel)); + + prop.queue_size = queue_size; + prop.is_interop = false; + prop.priority = 1; + prop.queue_percent = 100; + prop.type = type; + prop.vmid = 0; + prop.queue_address = kq->pq_gpu_addr; + prop.read_ptr = (uint32_t *) kq->rptr_gpu_addr; + prop.write_ptr = (uint32_t *) kq->wptr_gpu_addr; + + if (init_queue(&kq->queue, prop) != 0) + goto err_init_queue; + + kq->queue->device = dev; + kq->queue->process = kfd_get_process(current); + + retval = kq->mqd->init_mqd(kq->mqd, &kq->queue->mqd, + &kq->queue->mqd_mem_obj, + &kq->queue->gart_mqd_addr, + &kq->queue->properties); + if (retval != 0) + goto err_init_mqd; + + /* assign HIQ to HQD */ + if (type == KFD_QUEUE_TYPE_HIQ) { + pr_debug("assigning hiq to hqd\n"); + kq->queue->pipe = KFD_CIK_HIQ_PIPE; + kq->queue->queue = KFD_CIK_HIQ_QUEUE; + kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe, + kq->queue->queue, NULL); + } else { + /* allocate fence for DIQ */ + + retval = kfd2kgd->allocate_mem(dev->kgd, + sizeof(uint32_t), + 32, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &kq->fence_mem_obj); + + if (retval != 0) + goto err_alloc_fence; + + kq->fence_kernel_address = kq->fence_mem_obj->cpu_ptr; + kq->fence_gpu_addr = kq->fence_mem_obj->gpu_addr; + } + + print_queue(kq->queue); + + return true; +err_alloc_fence: +err_init_mqd: + uninit_queue(kq->queue); +err_init_queue: + kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->wptr_mem); +err_wptr_allocate_vidmem: + kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->rptr_mem); +err_rptr_allocate_vidmem: + kfd2kgd->free_mem(dev->kgd, (struct kgd_mem *) kq->pq); +err_pq_allocate_vidmem: + pr_err("kfd: error init pq\n"); + kfd_release_kernel_doorbell(dev, prop.doorbell_ptr); +err_get_kernel_doorbell: + pr_err("kfd: error init doorbell"); + return false; + +} + +static void uninitialize(struct kernel_queue *kq) +{ + BUG_ON(!kq); + + if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) + kq->mqd->destroy_mqd(kq->mqd, + NULL, + false, + QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, + kq->queue->pipe, + kq->queue->queue); + + kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->rptr_mem); + kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->wptr_mem); + kfd2kgd->free_mem(kq->dev->kgd, (struct kgd_mem *) kq->pq); + kfd_release_kernel_doorbell(kq->dev, + kq->queue->properties.doorbell_ptr); + uninit_queue(kq->queue); +} + +static int acquire_packet_buffer(struct kernel_queue *kq, + size_t packet_size_in_dwords, unsigned int **buffer_ptr) +{ + size_t available_size; + size_t queue_size_dwords; + uint32_t wptr, rptr; + unsigned int *queue_address; + + BUG_ON(!kq || !buffer_ptr); + + rptr = *kq->rptr_kernel; + wptr = *kq->wptr_kernel; + queue_address = (unsigned int *)kq->pq_kernel_addr; + queue_size_dwords = kq->queue->properties.queue_size / sizeof(uint32_t); + + pr_debug("kfd: In func %s\nrptr: %d\nwptr: %d\nqueue_address 0x%p\n", + __func__, rptr, wptr, queue_address); + + available_size = (rptr - 1 - wptr + queue_size_dwords) % + queue_size_dwords; + + if (packet_size_in_dwords >= queue_size_dwords || + packet_size_in_dwords >= available_size) { + /* + * make sure calling functions know + * acquire_packet_buffer() failed + */ + *buffer_ptr = NULL; + return -ENOMEM; + } + + if (wptr + packet_size_in_dwords >= queue_size_dwords) { + while (wptr > 0) { + queue_address[wptr] = kq->nop_packet; + wptr = (wptr + 1) % queue_size_dwords; + } + } + + *buffer_ptr = &queue_address[wptr]; + kq->pending_wptr = wptr + packet_size_in_dwords; + + return 0; +} + +static void submit_packet(struct kernel_queue *kq) +{ +#ifdef DEBUG + int i; +#endif + + BUG_ON(!kq); + +#ifdef DEBUG + for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) { + pr_debug("0x%2X ", kq->pq_kernel_addr[i]); + if (i % 15 == 0) + pr_debug("\n"); + } + pr_debug("\n"); +#endif + + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} + +static int sync_with_hw(struct kernel_queue *kq, unsigned long timeout_ms) +{ + unsigned long org_timeout_ms; + + BUG_ON(!kq); + + org_timeout_ms = timeout_ms; + timeout_ms += jiffies * 1000 / HZ; + while (*kq->wptr_kernel != *kq->rptr_kernel) { + if (time_after(jiffies * 1000 / HZ, timeout_ms)) { + pr_err("kfd: kernel_queue %s timeout expired %lu\n", + __func__, org_timeout_ms); + pr_err("kfd: wptr: %d rptr: %d\n", + *kq->wptr_kernel, *kq->rptr_kernel); + return -ETIME; + } + schedule(); + } + + return 0; +} + +static void rollback_packet(struct kernel_queue *kq) +{ + BUG_ON(!kq); + kq->pending_wptr = *kq->queue->properties.write_ptr; +} + +struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + enum kfd_queue_type type) +{ + struct kernel_queue *kq; + + BUG_ON(!dev); + + kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL); + if (!kq) + return NULL; + + kq->initialize = initialize; + kq->uninitialize = uninitialize; + kq->acquire_packet_buffer = acquire_packet_buffer; + kq->submit_packet = submit_packet; + kq->sync_with_hw = sync_with_hw; + kq->rollback_packet = rollback_packet; + + if (kq->initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE) == false) { + pr_err("kfd: failed to init kernel queue\n"); + kfree(kq); + return NULL; + } + return kq; +} + +void kernel_queue_uninit(struct kernel_queue *kq) +{ + BUG_ON(!kq); + + kq->uninitialize(kq); + kfree(kq); +} + +static __attribute__((unused)) void test_kq(struct kfd_dev *dev) +{ + struct kernel_queue *kq; + uint32_t *buffer, i; + int retval; + + BUG_ON(!dev); + + pr_debug("kfd: starting kernel queue test\n"); + + kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ); + BUG_ON(!kq); + + retval = kq->acquire_packet_buffer(kq, 5, &buffer); + BUG_ON(retval != 0); + for (i = 0; i < 5; i++) + buffer[i] = kq->nop_packet; + kq->submit_packet(kq); + kq->sync_with_hw(kq, 1000); + + pr_debug("kfd: ending kernel queue test\n"); +} + + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..dcd2bdb68d44f2821dcd9c4bffbb4c658ed6a7e2 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -0,0 +1,69 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_KERNEL_QUEUE_H_ +#define KFD_KERNEL_QUEUE_H_ + +#include +#include +#include "kfd_priv.h" + +struct kernel_queue { + /* interface */ + bool (*initialize)(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); + void (*uninitialize)(struct kernel_queue *kq); + int (*acquire_packet_buffer)(struct kernel_queue *kq, + size_t packet_size_in_dwords, + unsigned int **buffer_ptr); + + void (*submit_packet)(struct kernel_queue *kq); + int (*sync_with_hw)(struct kernel_queue *kq, + unsigned long timeout_ms); + void (*rollback_packet)(struct kernel_queue *kq); + + /* data */ + struct kfd_dev *dev; + struct mqd_manager *mqd; + struct queue *queue; + uint32_t pending_wptr; + unsigned int nop_packet; + + struct kfd_mem_obj *rptr_mem; + uint32_t *rptr_kernel; + uint64_t rptr_gpu_addr; + struct kfd_mem_obj *wptr_mem; + uint32_t *wptr_kernel; + uint64_t wptr_gpu_addr; + struct kfd_mem_obj *pq; + uint64_t pq_gpu_addr; + uint32_t *pq_kernel_addr; + + struct kfd_mem_obj *fence_mem_obj; + uint64_t fence_gpu_addr; + void *fence_kernel_address; + + struct list_head list; +}; + +#endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c new file mode 100644 index 0000000000000000000000000000000000000000..95d5af138e6e7f2bcbd8d76351f7e12827031189 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -0,0 +1,159 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "kfd_priv.h" + +#define KFD_DRIVER_AUTHOR "AMD Inc. and others" + +#define KFD_DRIVER_DESC "Standalone HSA driver for AMD's GPUs" +#define KFD_DRIVER_DATE "20141113" +#define KFD_DRIVER_MAJOR 0 +#define KFD_DRIVER_MINOR 7 +#define KFD_DRIVER_PATCHLEVEL 0 + +const struct kfd2kgd_calls *kfd2kgd; +static const struct kgd2kfd_calls kgd2kfd = { + .exit = kgd2kfd_exit, + .probe = kgd2kfd_probe, + .device_init = kgd2kfd_device_init, + .device_exit = kgd2kfd_device_exit, + .interrupt = kgd2kfd_interrupt, + .suspend = kgd2kfd_suspend, + .resume = kgd2kfd_resume, +}; + +int sched_policy = KFD_SCHED_POLICY_HWS; +module_param(sched_policy, int, 0444); +MODULE_PARM_DESC(sched_policy, + "Kernel cmdline parameter that defines the amdkfd scheduling policy"); + +int max_num_of_processes = KFD_MAX_NUM_OF_PROCESSES_DEFAULT; +module_param(max_num_of_processes, int, 0444); +MODULE_PARM_DESC(max_num_of_processes, + "Kernel cmdline parameter that defines the amdkfd maximum number of supported processes"); + +int max_num_of_queues_per_process = KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT; +module_param(max_num_of_queues_per_process, int, 0444); +MODULE_PARM_DESC(max_num_of_queues_per_process, + "Kernel cmdline parameter that defines the amdkfd maximum number of supported queues per process"); + +bool kgd2kfd_init(unsigned interface_version, + const struct kfd2kgd_calls *f2g, + const struct kgd2kfd_calls **g2f) +{ + /* + * Only one interface version is supported, + * no kfd/kgd version skew allowed. + */ + if (interface_version != KFD_INTERFACE_VERSION) + return false; + + /* Protection against multiple amd kgd loads */ + if (kfd2kgd) + return true; + + kfd2kgd = f2g; + *g2f = &kgd2kfd; + + return true; +} +EXPORT_SYMBOL(kgd2kfd_init); + +void kgd2kfd_exit(void) +{ +} + +static int __init kfd_module_init(void) +{ + int err; + + kfd2kgd = NULL; + + /* Verify module parameters */ + if ((sched_policy < KFD_SCHED_POLICY_HWS) || + (sched_policy > KFD_SCHED_POLICY_NO_HWS)) { + pr_err("kfd: sched_policy has invalid value\n"); + return -1; + } + + /* Verify module parameters */ + if ((max_num_of_processes < 0) || + (max_num_of_processes > KFD_MAX_NUM_OF_PROCESSES)) { + pr_err("kfd: max_num_of_processes must be between 0 to KFD_MAX_NUM_OF_PROCESSES\n"); + return -1; + } + + if ((max_num_of_queues_per_process < 0) || + (max_num_of_queues_per_process > + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)) { + pr_err("kfd: max_num_of_queues_per_process must be between 0 to KFD_MAX_NUM_OF_QUEUES_PER_PROCESS\n"); + return -1; + } + + err = kfd_pasid_init(); + if (err < 0) + goto err_pasid; + + err = kfd_chardev_init(); + if (err < 0) + goto err_ioctl; + + err = kfd_topology_init(); + if (err < 0) + goto err_topology; + + kfd_process_create_wq(); + + dev_info(kfd_device, "Initialized module\n"); + + return 0; + +err_topology: + kfd_chardev_exit(); +err_ioctl: + kfd_pasid_exit(); +err_pasid: + return err; +} + +static void __exit kfd_module_exit(void) +{ + kfd_process_destroy_wq(); + kfd_topology_shutdown(); + kfd_chardev_exit(); + kfd_pasid_exit(); + dev_info(kfd_device, "Removed module\n"); +} + +module_init(kfd_module_init); +module_exit(kfd_module_exit); + +MODULE_AUTHOR(KFD_DRIVER_AUTHOR); +MODULE_DESCRIPTION(KFD_DRIVER_DESC); +MODULE_LICENSE("GPL and additional rights"); +MODULE_VERSION(__stringify(KFD_DRIVER_MAJOR) "." + __stringify(KFD_DRIVER_MINOR) "." + __stringify(KFD_DRIVER_PATCHLEVEL)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c new file mode 100644 index 0000000000000000000000000000000000000000..adc31474e786195bb3308150334c5a7a280f1d81 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -0,0 +1,346 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "cik_regs.h" +#include "../../radeon/cik_reg.h" + +inline void busy_wait(unsigned long ms) +{ + while (time_before(jiffies, ms)) + cpu_relax(); +} + +static inline struct cik_mqd *get_mqd(void *mqd) +{ + return (struct cik_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct cik_mqd *m; + int retval; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + retval = kfd2kgd->allocate_mem(mm->dev->kgd, + sizeof(struct cik_mqd), + 256, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + /* + * Make sure to use the last queue state saved on mqd when the cp + * reassigns the queue, so when queue is switched on/off (e.g over + * subscription or quantum timeout) the context will be consistent + */ + m->cp_hqd_persistent_state = + DEFAULT_CP_HQD_PERSISTENT_STATE | PRELOAD_REQ; + + m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; + /* Although WinKFD writes this, I suspect it should not be necessary */ + m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; + + m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | + QUANTUM_DURATION(10); + + /* + * Pipe Priority + * Identifies the pipe relative priority when this queue is connected + * to the pipeline. The pipe priority is against the GFX pipe and HP3D. + * In KFD we are using a fixed pipe priority set to CS_MEDIUM. + * 0 = CS_LOW (typically below GFX) + * 1 = CS_MEDIUM (typically between HP3D and GFX + * 2 = CS_HIGH (typically above HP3D) + */ + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + *mqd = m; + if (gart_addr != NULL) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + BUG_ON(!mm || !mqd); + kfd2kgd->free_mem(mm->dev->kgd, (struct kgd_mem *) mqd_mem_obj); +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + return kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, wptr); + +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN; + + /* + * Calculating queue size which is log base 2 of actual queue size -1 + * dwords and another -1 for ffs + */ + m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) + - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_iq_rptr = AQL_ENABLE; + m->cp_hqd_pq_control |= NO_UPDATE_RPTR; + } + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout, + pipe_id, queue_id); +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + + return kfd2kgd->hqd_is_occupies(mm->dev->kgd, queue_address, + pipe_id, queue_id); + +} + +/* + * HIQ MQD Implementation, concrete implementation for HIQ MQD implementation. + * The HIQ queue in Kaveri is using the same MQD structure as all the user mode + * queues but with different initial values. + */ + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + uint64_t addr; + struct cik_mqd *m; + int retval; + + BUG_ON(!mm || !q || !mqd || !mqd_mem_obj); + + pr_debug("kfd: In func %s\n", __func__); + + retval = kfd2kgd->allocate_mem(mm->dev->kgd, + sizeof(struct cik_mqd), + 256, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | + PRELOAD_REQ; + m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | + QUANTUM_DURATION(10); + + m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; + + /* + * Pipe Priority + * Identifies the pipe relative priority when this queue is connected + * to the pipeline. The pipe priority is against the GFX pipe and HP3D. + * In KFD we are using a fixed pipe priority set to CS_MEDIUM. + * 0 = CS_LOW (typically below GFX) + * 1 = CS_MEDIUM (typically between HP3D and GFX + * 2 = CS_HIGH (typically above HP3D) + */ + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct cik_mqd *m; + + BUG_ON(!mm || !q || !mqd); + + pr_debug("kfd: In func %s\n", __func__); + + m = get_mqd(mqd); + m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE | + DEFAULT_MIN_AVAIL_SIZE | + PRIV_STATE | + KMD_QUEUE; + + /* + * Calculating queue size which is log base 2 of actual queue + * size -1 dwords + */ + m->cp_hqd_pq_control |= ffs(q->queue_size / sizeof(unsigned int)) + - 1 - 1; + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_doorbell_control = DOORBELL_EN | + DOORBELL_OFFSET(q->doorbell_off); + + m->cp_hqd_vmid = q->vmid; + + m->cp_hqd_active = 0; + q->is_active = false; + if (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0) { + m->cp_hqd_active = 1; + q->is_active = true; + } + + return 0; +} + +struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + BUG_ON(!dev); + BUG_ON(type >= KFD_MQD_TYPE_MAX); + + pr_debug("kfd: In func %s\n", __func__); + + mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CIK_CP: + case KFD_MQD_TYPE_CIK_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + case KFD_MQD_TYPE_CIK_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} + +/* SDMA queues should be implemented here when the cp will supports them */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..213a71e0b6c784d2694d57c978e3bfb92cde8b04 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -0,0 +1,91 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_MQD_MANAGER_H_ +#define KFD_MQD_MANAGER_H_ + +#include "kfd_priv.h" + +/** + * struct mqd_manager + * + * @init_mqd: Allocates the mqd buffer on local gpu memory and initialize it. + * + * @load_mqd: Loads the mqd to a concrete hqd slot. Used only for no cp + * scheduling mode. + * + * @update_mqd: Handles a update call for the MQD + * + * @destroy_mqd: Destroys the HQD slot and by that preempt the relevant queue. + * Used only for no cp scheduling. + * + * @uninit_mqd: Releases the mqd buffer from local gpu memory. + * + * @is_occupied: Checks if the relevant HQD slot is occupied. + * + * @mqd_mutex: Mqd manager mutex. + * + * @dev: The kfd device structure coupled with this module. + * + * MQD stands for Memory Queue Descriptor which represents the current queue + * state in the memory and initiate the HQD (Hardware Queue Descriptor) state. + * This structure is actually a base class for the different types of MQDs + * structures for the variant ASICs that should be supported in the future. + * This base class is also contains all the MQD specific operations. + * Another important thing to mention is that each queue has a MQD that keeps + * his state (or context) after each preemption or reassignment. + * Basically there are a instances of the mqd manager class per MQD type per + * ASIC. Currently the kfd driver supports only Kaveri so there are instances + * per KFD_MQD_TYPE for each device. + * + */ + +struct mqd_manager { + int (*init_mqd)(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q); + + int (*load_mqd)(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr); + + int (*update_mqd)(struct mqd_manager *mm, void *mqd, + struct queue_properties *q); + + int (*destroy_mqd)(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); + + void (*uninit_mqd)(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj); + + bool (*is_occupied)(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id); + + struct mutex mqd_mutex; + struct kfd_dev *dev; +}; + +#endif /* KFD_MQD_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c new file mode 100644 index 0000000000000000000000000000000000000000..5ce9233d2004e064f1a734d90a2ca33f3bd7880b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -0,0 +1,565 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "kfd_device_queue_manager.h" +#include "kfd_kernel_queue.h" +#include "kfd_priv.h" +#include "kfd_pm4_headers.h" +#include "kfd_pm4_opcodes.h" + +static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, + unsigned int buffer_size_bytes) +{ + unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); + + BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes); + *wptr = temp; +} + +static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) +{ + union PM4_MES_TYPE_3_HEADER header; + + header.u32all = 0; + header.opcode = opcode; + header.count = packet_size/sizeof(uint32_t) - 2; + header.type = PM4_TYPE_3; + + return header.u32all; +} + +static void pm_calc_rlib_size(struct packet_manager *pm, + unsigned int *rlib_size, + bool *over_subscription) +{ + unsigned int process_count, queue_count; + + BUG_ON(!pm || !rlib_size || !over_subscription); + + process_count = pm->dqm->processes_count; + queue_count = pm->dqm->queue_count; + + /* check if there is over subscription*/ + *over_subscription = false; + if ((process_count > 1) || + queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) { + *over_subscription = true; + pr_debug("kfd: over subscribed runlist\n"); + } + + /* calculate run list ib allocation size */ + *rlib_size = process_count * sizeof(struct pm4_map_process) + + queue_count * sizeof(struct pm4_map_queues); + + /* + * Increase the allocation size in case we need a chained run list + * when over subscription + */ + if (*over_subscription) + *rlib_size += sizeof(struct pm4_runlist); + + pr_debug("kfd: runlist ib size %d\n", *rlib_size); +} + +static int pm_allocate_runlist_ib(struct packet_manager *pm, + unsigned int **rl_buffer, + uint64_t *rl_gpu_buffer, + unsigned int *rl_buffer_size, + bool *is_over_subscription) +{ + int retval; + + BUG_ON(!pm); + BUG_ON(pm->allocated == true); + BUG_ON(is_over_subscription == NULL); + + pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + + retval = kfd2kgd->allocate_mem(pm->dqm->dev->kgd, + *rl_buffer_size, + PAGE_SIZE, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE, + (struct kgd_mem **) &pm->ib_buffer_obj); + + if (retval != 0) { + pr_err("kfd: failed to allocate runlist IB\n"); + return retval; + } + + *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; + *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr; + + memset(*rl_buffer, 0, *rl_buffer_size); + pm->allocated = true; + return retval; +} + +static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_runlist *packet; + + BUG_ON(!pm || !buffer || !ib); + + packet = (struct pm4_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_runlist)); + packet->header.u32all = build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->ordinal2 = lower_32_bits(ib); + packet->bitfields3.ib_base_hi = upper_32_bits(ib); + + return 0; +} + +static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) +{ + struct pm4_map_process *packet; + struct queue *cur; + uint32_t num_queues; + + BUG_ON(!pm || !buffer || !qpd); + + packet = (struct pm4_map_process *)buffer; + + pr_debug("kfd: In func %s\n", __func__); + + memset(buffer, 0, sizeof(struct pm4_map_process)); + + packet->header.u32all = build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields3.page_table_base = qpd->page_table_base; + packet->bitfields10.gds_size = qpd->gds_size; + packet->bitfields10.num_gws = qpd->num_gws; + packet->bitfields10.num_oac = qpd->num_oac; + num_queues = 0; + list_for_each_entry(cur, &qpd->queues_list, list) + num_queues++; + packet->bitfields10.num_queues = num_queues; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; + packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + return 0; +} + +static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, + struct queue *q) +{ + struct pm4_map_queues *packet; + + BUG_ON(!pm || !buffer || !q); + + pr_debug("kfd: In func %s\n", __func__); + + packet = (struct pm4_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_map_queues)); + + packet->header.u32all = build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots; + + packet->bitfields2.vidmem = (q->properties.is_interop) ? + vidmem__mes_map_queues__uses_video_memory : + vidmem__mes_map_queues__uses_no_video_memory; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__sdma0; + break; + default: + BUG(); + break; + } + + packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mes_map_queues_ordinals[0].mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mes_map_queues_ordinals[0].mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->mes_map_queues_ordinals[0].wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->mes_map_queues_ordinals[0].wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_create_runlist_ib(struct packet_manager *pm, + struct list_head *queues, + uint64_t *rl_gpu_addr, + size_t *rl_size_bytes) +{ + unsigned int alloc_size_bytes; + unsigned int *rl_buffer, rl_wptr, i; + int retval, proccesses_mapped; + struct device_process_node *cur; + struct qcm_process_device *qpd; + struct queue *q; + struct kernel_queue *kq; + bool is_over_subscription; + + BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr); + + rl_wptr = retval = proccesses_mapped = 0; + + retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, + &alloc_size_bytes, &is_over_subscription); + if (retval != 0) + return retval; + + *rl_size_bytes = alloc_size_bytes; + + pr_debug("kfd: In func %s\n", __func__); + pr_debug("kfd: building runlist ib process count: %d queues count %d\n", + pm->dqm->processes_count, pm->dqm->queue_count); + + /* build the run list ib packet */ + list_for_each_entry(cur, queues, list) { + qpd = cur->qpd; + /* build map process packet */ + if (proccesses_mapped >= pm->dqm->processes_count) { + pr_debug("kfd: not enough space left in runlist IB\n"); + pm_release_ib(pm); + return -ENOMEM; + } + retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); + if (retval != 0) + return retval; + proccesses_mapped++; + inc_wptr(&rl_wptr, sizeof(struct pm4_map_process), + alloc_size_bytes); + + list_for_each_entry(kq, &qpd->priv_queue_list, list) { + if (kq->queue->properties.is_active != true) + continue; + retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr], + kq->queue); + if (retval != 0) + return retval; + inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), + alloc_size_bytes); + } + + list_for_each_entry(q, &qpd->queues_list, list) { + if (q->properties.is_active != true) + continue; + retval = pm_create_map_queue(pm, + &rl_buffer[rl_wptr], q); + if (retval != 0) + return retval; + inc_wptr(&rl_wptr, sizeof(struct pm4_map_queues), + alloc_size_bytes); + } + } + + pr_debug("kfd: finished map process and queues to runlist\n"); + + if (is_over_subscription) + pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, + alloc_size_bytes / sizeof(uint32_t), true); + + for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) + pr_debug("0x%2X ", rl_buffer[i]); + pr_debug("\n"); + + return 0; +} + +int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) +{ + BUG_ON(!dqm); + + pm->dqm = dqm; + mutex_init(&pm->lock); + pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); + if (pm->priv_queue == NULL) { + mutex_destroy(&pm->lock); + return -ENOMEM; + } + pm->allocated = false; + + return 0; +} + +void pm_uninit(struct packet_manager *pm) +{ + BUG_ON(!pm); + + mutex_destroy(&pm->lock); + kernel_queue_uninit(pm->priv_queue); +} + +int pm_send_set_resources(struct packet_manager *pm, + struct scheduling_resources *res) +{ + struct pm4_set_resources *packet; + + BUG_ON(!pm || !res); + + pr_debug("kfd: In func %s\n", __func__); + + mutex_lock(&pm->lock); + pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + sizeof(*packet) / sizeof(uint32_t), + (unsigned int **)&packet); + if (packet == NULL) { + mutex_unlock(&pm->lock); + pr_err("kfd: failed to allocate buffer on kernel queue\n"); + return -ENOMEM; + } + + memset(packet, 0, sizeof(struct pm4_set_resources)); + packet->header.u32all = build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + pm->priv_queue->submit_packet(pm->priv_queue); + pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + + mutex_unlock(&pm->lock); + + return 0; +} + +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) +{ + uint64_t rl_gpu_ib_addr; + uint32_t *rl_buffer; + size_t rl_ib_size, packet_size_dwords; + int retval; + + BUG_ON(!pm || !dqm_queues); + + retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, + &rl_ib_size); + if (retval != 0) + goto fail_create_runlist_ib; + + pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr); + + packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t); + mutex_lock(&pm->lock); + + retval = pm->priv_queue->acquire_packet_buffer(pm->priv_queue, + packet_size_dwords, &rl_buffer); + if (retval != 0) + goto fail_acquire_packet_buffer; + + retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, + rl_ib_size / sizeof(uint32_t), false); + if (retval != 0) + goto fail_create_runlist; + + pm->priv_queue->submit_packet(pm->priv_queue); + pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + + mutex_unlock(&pm->lock); + + return retval; + +fail_create_runlist: + pm->priv_queue->rollback_packet(pm->priv_queue); +fail_acquire_packet_buffer: + mutex_unlock(&pm->lock); +fail_create_runlist_ib: + if (pm->allocated == true) + pm_release_ib(pm); + return retval; +} + +int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + uint32_t fence_value) +{ + int retval; + struct pm4_query_status *packet; + + BUG_ON(!pm || !fence_address); + + mutex_lock(&pm->lock); + retval = pm->priv_queue->acquire_packet_buffer( + pm->priv_queue, + sizeof(struct pm4_query_status) / sizeof(uint32_t), + (unsigned int **)&packet); + if (retval != 0) + goto fail_acquire_packet_buffer; + + packet->header.u32all = build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + pm->priv_queue->submit_packet(pm->priv_queue); + pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + mutex_unlock(&pm->lock); + + return 0; + +fail_acquire_packet_buffer: + mutex_unlock(&pm->lock); + return retval; +} + +int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + enum kfd_preempt_type_filter mode, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + int retval; + uint32_t *buffer; + struct pm4_unmap_queues *packet; + + BUG_ON(!pm); + + mutex_lock(&pm->lock); + retval = pm->priv_queue->acquire_packet_buffer( + pm->priv_queue, + sizeof(struct pm4_unmap_queues) / sizeof(uint32_t), + &buffer); + if (retval != 0) + goto err_acquire_packet_buffer; + + packet = (struct pm4_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_unmap_queues)); + + packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + BUG(); + break; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (mode) { + case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_PREEMPT_TYPE_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues; + break; + default: + BUG(); + break; + }; + + pm->priv_queue->submit_packet(pm->priv_queue); + pm->priv_queue->sync_with_hw(pm->priv_queue, KFD_HIQ_TIMEOUT); + + mutex_unlock(&pm->lock); + return 0; + +err_acquire_packet_buffer: + mutex_unlock(&pm->lock); + return retval; +} + +void pm_release_ib(struct packet_manager *pm) +{ + BUG_ON(!pm); + + mutex_lock(&pm->lock); + if (pm->allocated) { + kfd2kgd->free_mem(pm->dqm->dev->kgd, + (struct kgd_mem *) pm->ib_buffer_obj); + pm->allocated = false; + } + mutex_unlock(&pm->lock); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c new file mode 100644 index 0000000000000000000000000000000000000000..71699ad97d74487d532cef2a168f8bb4d4fb5366 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c @@ -0,0 +1,96 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include "kfd_priv.h" + +static unsigned long *pasid_bitmap; +static unsigned int pasid_limit; +static DEFINE_MUTEX(pasid_mutex); + +int kfd_pasid_init(void) +{ + pasid_limit = max_num_of_processes; + + pasid_bitmap = kzalloc(BITS_TO_LONGS(pasid_limit), GFP_KERNEL); + if (!pasid_bitmap) + return -ENOMEM; + + set_bit(0, pasid_bitmap); /* PASID 0 is reserved. */ + + return 0; +} + +void kfd_pasid_exit(void) +{ + kfree(pasid_bitmap); +} + +bool kfd_set_pasid_limit(unsigned int new_limit) +{ + if (new_limit < pasid_limit) { + bool ok; + + mutex_lock(&pasid_mutex); + + /* ensure that no pasids >= new_limit are in-use */ + ok = (find_next_bit(pasid_bitmap, pasid_limit, new_limit) == + pasid_limit); + if (ok) + pasid_limit = new_limit; + + mutex_unlock(&pasid_mutex); + + return ok; + } + + return true; +} + +inline unsigned int kfd_get_pasid_limit(void) +{ + return pasid_limit; +} + +unsigned int kfd_pasid_alloc(void) +{ + unsigned int found; + + mutex_lock(&pasid_mutex); + + found = find_first_zero_bit(pasid_bitmap, pasid_limit); + if (found == pasid_limit) + found = 0; + else + set_bit(found, pasid_bitmap); + + mutex_unlock(&pasid_mutex); + + return found; +} + +void kfd_pasid_free(unsigned int pasid) +{ + BUG_ON(pasid == 0 || pasid >= pasid_limit); + clear_bit(pasid, pasid_bitmap); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h new file mode 100644 index 0000000000000000000000000000000000000000..071ad5724bd2cd12d42963fe23a7a9ad7cda24c3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h @@ -0,0 +1,405 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_PM4_HEADERS_H_ +#define KFD_PM4_HEADERS_H_ + +#ifndef PM4_MES_HEADER_DEFINED +#define PM4_MES_HEADER_DEFINED +union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1:8; /* < reserved */ + uint32_t opcode:8; /* < IT opcode */ + uint32_t count:14; /* < number of DWORDs - 1 + * in the information body. + */ + uint32_t type:2; /* < packet identifier. + * It should be 3 for type 3 packets + */ + }; + uint32_t u32all; +}; +#endif /* PM4_MES_HEADER_DEFINED */ + +/* --------------------MES_SET_RESOURCES-------------------- */ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + +struct pm4_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST-------------------- */ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t ib_base_hi:16; + uint32_t reserved2:16; + } bitfields3; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved3:1; + uint32_t valid:1; + uint32_t reserved4:8; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif + +/*--------------------MES_MAP_PROCESS-------------------- */ + +#ifndef PM4_MES_MAP_PROCESS_DEFINED +#define PM4_MES_MAP_PROCESS_DEFINED + +struct pm4_map_process { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t page_table_base:28; + uint32_t reserved3:4; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t sh_mem_bases; + uint32_t sh_mem_ape1_base; + uint32_t sh_mem_ape1_limit; + uint32_t sh_mem_config; + uint32_t gds_addr_lo; + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; + uint32_t reserved4:2; + uint32_t num_oac:4; + uint32_t reserved5:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields10; + uint32_t ordinal10; + }; + +}; +#endif + +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_DEFINED +#define PM4_MES_MAP_QUEUES_DEFINED +enum map_queues_queue_sel_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots = 0, + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1, + queue_sel__mes_map_queues__enable_process_queues = 2 +}; + +enum map_queues_vidmem_enum { + vidmem__mes_map_queues__uses_no_video_memory = 0, + vidmem__mes_map_queues__uses_video_memory = 1 +}; + +enum map_queues_alloc_format_enum { + alloc_format__mes_map_queues__one_per_pipe = 0, + alloc_format__mes_map_queues__all_on_one_pipe = 1 +}; + +enum map_queues_engine_sel_enum { + engine_sel__mes_map_queues__compute = 0, + engine_sel__mes_map_queues__sdma0 = 2, + engine_sel__mes_map_queues__sdma1 = 3 +}; + +struct pm4_map_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:4; + enum map_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:2; + uint32_t vmid:4; + uint32_t reserved3:4; + enum map_queues_vidmem_enum vidmem:2; + uint32_t reserved4:6; + enum map_queues_alloc_format_enum alloc_format:2; + enum map_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + struct { + union { + struct { + uint32_t reserved5:2; + uint32_t doorbell_offset:21; + uint32_t reserved6:3; + uint32_t queue:6; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; + + } mes_map_queues_ordinals[1]; /* 1..N of these ordinal groups */ + +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t context_id:28; + enum query_status_interrupt_sel_enum interrupt_sel:2; + enum query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:21; + uint32_t reserved3:3; + enum query_status_engine_sel_enum engine_sel:3; + uint32_t reserved4:3; + } bitfields3b; + uint32_t ordinal3; + }; + + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2 +}; + +enum unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2 +}; + +enum unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdma1 = 3 +}; + +struct pm4_unmap_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:21; + uint32_t reserved5:9; + } bitfields3b; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t reserved6:2; + uint32_t doorbell_offset1:21; + uint32_t reserved7:9; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:21; + uint32_t reserved9:9; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:21; + uint32_t reserved11:9; + } bitfields6; + uint32_t ordinal6; + }; + +}; +#endif + +enum { + CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +}; + +#endif /* KFD_PM4_HEADERS_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h new file mode 100644 index 0000000000000000000000000000000000000000..b72fa3b8c2d48dc08b6fcf250a2dc15f8f0ef607 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h @@ -0,0 +1,107 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + + +#ifndef KFD_PM4_OPCODES_H +#define KFD_PM4_OPCODES_H + +enum it_opcode_type { + IT_NOP = 0x10, + IT_SET_BASE = 0x11, + IT_CLEAR_STATE = 0x12, + IT_INDEX_BUFFER_SIZE = 0x13, + IT_DISPATCH_DIRECT = 0x15, + IT_DISPATCH_INDIRECT = 0x16, + IT_ATOMIC_GDS = 0x1D, + IT_OCCLUSION_QUERY = 0x1F, + IT_SET_PREDICATION = 0x20, + IT_REG_RMW = 0x21, + IT_COND_EXEC = 0x22, + IT_PRED_EXEC = 0x23, + IT_DRAW_INDIRECT = 0x24, + IT_DRAW_INDEX_INDIRECT = 0x25, + IT_INDEX_BASE = 0x26, + IT_DRAW_INDEX_2 = 0x27, + IT_CONTEXT_CONTROL = 0x28, + IT_INDEX_TYPE = 0x2A, + IT_DRAW_INDIRECT_MULTI = 0x2C, + IT_DRAW_INDEX_AUTO = 0x2D, + IT_NUM_INSTANCES = 0x2F, + IT_DRAW_INDEX_MULTI_AUTO = 0x30, + IT_INDIRECT_BUFFER_CNST = 0x33, + IT_STRMOUT_BUFFER_UPDATE = 0x34, + IT_DRAW_INDEX_OFFSET_2 = 0x35, + IT_DRAW_PREAMBLE = 0x36, + IT_WRITE_DATA = 0x37, + IT_DRAW_INDEX_INDIRECT_MULTI = 0x38, + IT_MEM_SEMAPHORE = 0x39, + IT_COPY_DW = 0x3B, + IT_WAIT_REG_MEM = 0x3C, + IT_INDIRECT_BUFFER = 0x3F, + IT_COPY_DATA = 0x40, + IT_PFP_SYNC_ME = 0x42, + IT_SURFACE_SYNC = 0x43, + IT_COND_WRITE = 0x45, + IT_EVENT_WRITE = 0x46, + IT_EVENT_WRITE_EOP = 0x47, + IT_EVENT_WRITE_EOS = 0x48, + IT_RELEASE_MEM = 0x49, + IT_PREAMBLE_CNTL = 0x4A, + IT_DMA_DATA = 0x50, + IT_ACQUIRE_MEM = 0x58, + IT_REWIND = 0x59, + IT_LOAD_UCONFIG_REG = 0x5E, + IT_LOAD_SH_REG = 0x5F, + IT_LOAD_CONFIG_REG = 0x60, + IT_LOAD_CONTEXT_REG = 0x61, + IT_SET_CONFIG_REG = 0x68, + IT_SET_CONTEXT_REG = 0x69, + IT_SET_CONTEXT_REG_INDIRECT = 0x73, + IT_SET_SH_REG = 0x76, + IT_SET_SH_REG_OFFSET = 0x77, + IT_SET_QUEUE_REG = 0x78, + IT_SET_UCONFIG_REG = 0x79, + IT_SCRATCH_RAM_WRITE = 0x7D, + IT_SCRATCH_RAM_READ = 0x7E, + IT_LOAD_CONST_RAM = 0x80, + IT_WRITE_CONST_RAM = 0x81, + IT_DUMP_CONST_RAM = 0x83, + IT_INCREMENT_CE_COUNTER = 0x84, + IT_INCREMENT_DE_COUNTER = 0x85, + IT_WAIT_ON_CE_COUNTER = 0x86, + IT_WAIT_ON_DE_COUNTER_DIFF = 0x88, + IT_SWITCH_BUFFER = 0x8B, + IT_SET_RESOURCES = 0xA0, + IT_MAP_PROCESS = 0xA1, + IT_MAP_QUEUES = 0xA2, + IT_UNMAP_QUEUES = 0xA3, + IT_QUERY_STATUS = 0xA4, + IT_RUN_LIST = 0xA5, +}; + +#define PM4_TYPE_0 0 +#define PM4_TYPE_2 2 +#define PM4_TYPE_3 3 + +#endif /* KFD_PM4_OPCODES_H */ + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h new file mode 100644 index 0000000000000000000000000000000000000000..f9fb81e3bb09b24edd7f8de6c22cf7e64a92391b --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -0,0 +1,600 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef KFD_PRIV_H_INCLUDED +#define KFD_PRIV_H_INCLUDED + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define KFD_SYSFS_FILE_MODE 0444 + +/* + * When working with cp scheduler we should assign the HIQ manually or via + * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot + * definitions for Kaveri. In Kaveri only the first ME queues participates + * in the cp scheduling taking that in mind we set the HIQ slot in the + * second ME. + */ +#define KFD_CIK_HIQ_PIPE 4 +#define KFD_CIK_HIQ_QUEUE 0 + +/* GPU ID hash width in bits */ +#define KFD_GPU_ID_HASH_WIDTH 16 + +/* Macro for allocating structures */ +#define kfd_alloc_struct(ptr_to_struct) \ + ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) + +/* Kernel module parameter to specify maximum number of supported processes */ +extern int max_num_of_processes; + +#define KFD_MAX_NUM_OF_PROCESSES_DEFAULT 32 +#define KFD_MAX_NUM_OF_PROCESSES 512 + +/* + * Kernel module parameter to specify maximum number of supported queues + * per process + */ +extern int max_num_of_queues_per_process; + +#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS_DEFAULT 128 +#define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 + +#define KFD_KERNEL_QUEUE_SIZE 2048 + +/* Kernel module parameter to specify the scheduling policy */ +extern int sched_policy; + +/** + * enum kfd_sched_policy + * + * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) + * scheduling. In this scheduling mode we're using the firmware code to + * schedule the user mode queues and kernel queues such as HIQ and DIQ. + * the HIQ queue is used as a special queue that dispatches the configuration + * to the cp and the user mode queues list that are currently running. + * the DIQ queue is a debugging queue that dispatches debugging commands to the + * firmware. + * in this scheduling mode user mode queues over subscription feature is + * enabled. + * + * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over + * subscription feature disabled. + * + * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly + * set the command processor registers and sets the queues "manually". This + * mode is used *ONLY* for debugging proposes. + * + */ +enum kfd_sched_policy { + KFD_SCHED_POLICY_HWS = 0, + KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, + KFD_SCHED_POLICY_NO_HWS +}; + +enum cache_policy { + cache_policy_coherent, + cache_policy_noncoherent +}; + +struct kfd_device_info { + unsigned int max_pasid_bits; + size_t ih_ring_entry_size; + uint16_t mqd_size_aligned; +}; + +struct kfd_dev { + struct kgd_dev *kgd; + + const struct kfd_device_info *device_info; + struct pci_dev *pdev; + + unsigned int id; /* topology stub index */ + + phys_addr_t doorbell_base; /* Start of actual doorbells used by + * KFD. It is aligned for mapping + * into user mode + */ + size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell + * to HW doorbell, GFX reserved some + * at the start) + */ + size_t doorbell_process_limit; /* Number of processes we have doorbell + * space for. + */ + u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells + * page used by kernel queue + */ + + struct kgd2kfd_shared_resources shared_resources; + + void *interrupt_ring; + size_t interrupt_ring_size; + atomic_t interrupt_ring_rptr; + atomic_t interrupt_ring_wptr; + struct work_struct interrupt_work; + spinlock_t interrupt_lock; + + /* QCM Device instance */ + struct device_queue_manager *dqm; + + bool init_complete; + /* + * Interrupts of interest to KFD are copied + * from the HW ring into a SW ring. + */ + bool interrupts_active; +}; + +/* KGD2KFD callbacks */ +void kgd2kfd_exit(void); +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev); +bool kgd2kfd_device_init(struct kfd_dev *kfd, + const struct kgd2kfd_shared_resources *gpu_resources); +void kgd2kfd_device_exit(struct kfd_dev *kfd); + +extern const struct kfd2kgd_calls *kfd2kgd; + +struct kfd_mem_obj { + void *bo; + uint64_t gpu_addr; + uint32_t *cpu_ptr; +}; + +enum kfd_mempool { + KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, + KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, + KFD_MEMPOOL_FRAMEBUFFER = 3, +}; + +/* Character device interface */ +int kfd_chardev_init(void); +void kfd_chardev_exit(void); +struct device *kfd_chardev(void); + +/** + * enum kfd_preempt_type_filter + * + * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue. + * + * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the + * running queues list. + * + * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to + * specific process. + * + */ +enum kfd_preempt_type_filter { + KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, + KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, + KFD_PREEMPT_TYPE_FILTER_BY_PASID +}; + +enum kfd_preempt_type { + KFD_PREEMPT_TYPE_WAVEFRONT, + KFD_PREEMPT_TYPE_WAVEFRONT_RESET +}; + +/** + * enum kfd_queue_type + * + * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. + * + * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. + * + * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. + * + * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. + */ +enum kfd_queue_type { + KFD_QUEUE_TYPE_COMPUTE, + KFD_QUEUE_TYPE_SDMA, + KFD_QUEUE_TYPE_HIQ, + KFD_QUEUE_TYPE_DIQ +}; + +enum kfd_queue_format { + KFD_QUEUE_FORMAT_PM4, + KFD_QUEUE_FORMAT_AQL +}; + +/** + * struct queue_properties + * + * @type: The queue type. + * + * @queue_id: Queue identifier. + * + * @queue_address: Queue ring buffer address. + * + * @queue_size: Queue ring buffer size. + * + * @priority: Defines the queue priority relative to other queues in the + * process. + * This is just an indication and HW scheduling may override the priority as + * necessary while keeping the relative prioritization. + * the priority granularity is from 0 to f which f is the highest priority. + * currently all queues are initialized with the highest priority. + * + * @queue_percent: This field is partially implemented and currently a zero in + * this field defines that the queue is non active. + * + * @read_ptr: User space address which points to the number of dwords the + * cp read from the ring buffer. This field updates automatically by the H/W. + * + * @write_ptr: Defines the number of dwords written to the ring buffer. + * + * @doorbell_ptr: This field aim is to notify the H/W of new packet written to + * the queue ring buffer. This field should be similar to write_ptr and the user + * should update this field after he updated the write_ptr. + * + * @doorbell_off: The doorbell offset in the doorbell pci-bar. + * + * @is_interop: Defines if this is a interop queue. Interop queue means that the + * queue can access both graphics and compute resources. + * + * @is_active: Defines if the queue is active or not. + * + * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid + * of the queue. + * + * This structure represents the queue properties for each queue no matter if + * it's user mode or kernel mode queue. + * + */ +struct queue_properties { + enum kfd_queue_type type; + enum kfd_queue_format format; + unsigned int queue_id; + uint64_t queue_address; + uint64_t queue_size; + uint32_t priority; + uint32_t queue_percent; + uint32_t *read_ptr; + uint32_t *write_ptr; + uint32_t __iomem *doorbell_ptr; + uint32_t doorbell_off; + bool is_interop; + bool is_active; + /* Not relevant for user mode queues in cp scheduling */ + unsigned int vmid; +}; + +/** + * struct queue + * + * @list: Queue linked list. + * + * @mqd: The queue MQD. + * + * @mqd_mem_obj: The MQD local gpu memory object. + * + * @gart_mqd_addr: The MQD gart mc address. + * + * @properties: The queue properties. + * + * @mec: Used only in no cp scheduling mode and identifies to micro engine id + * that the queue should be execute on. + * + * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. + * + * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. + * + * @process: The kfd process that created this queue. + * + * @device: The kfd device that created this queue. + * + * This structure represents user mode compute queues. + * It contains all the necessary data to handle such queues. + * + */ + +struct queue { + struct list_head list; + void *mqd; + struct kfd_mem_obj *mqd_mem_obj; + uint64_t gart_mqd_addr; + struct queue_properties properties; + + uint32_t mec; + uint32_t pipe; + uint32_t queue; + + struct kfd_process *process; + struct kfd_dev *device; +}; + +/* + * Please read the kfd_mqd_manager.h description. + */ +enum KFD_MQD_TYPE { + KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ + KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ + KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ + KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ + KFD_MQD_TYPE_MAX +}; + +struct scheduling_resources { + unsigned int vmid_mask; + enum kfd_queue_type type; + uint64_t queue_mask; + uint64_t gws_mask; + uint32_t oac_mask; + uint32_t gds_heap_base; + uint32_t gds_heap_size; +}; + +struct process_queue_manager { + /* data */ + struct kfd_process *process; + unsigned int num_concurrent_processes; + struct list_head queues; + unsigned long *queue_slot_bitmap; +}; + +struct qcm_process_device { + /* The Device Queue Manager that owns this data */ + struct device_queue_manager *dqm; + struct process_queue_manager *pqm; + /* Device Queue Manager lock */ + struct mutex *lock; + /* Queues list */ + struct list_head queues_list; + struct list_head priv_queue_list; + + unsigned int queue_count; + unsigned int vmid; + bool is_debug; + /* + * All the memory management data should be here too + */ + uint64_t gds_context_area; + uint32_t sh_mem_config; + uint32_t sh_mem_bases; + uint32_t sh_mem_ape1_base; + uint32_t sh_mem_ape1_limit; + uint32_t page_table_base; + uint32_t gds_size; + uint32_t num_gws; + uint32_t num_oac; +}; + +/* Data that is per-process-per device. */ +struct kfd_process_device { + /* + * List of all per-device data for a process. + * Starts from kfd_process.per_device_data. + */ + struct list_head per_device_list; + + /* The device that owns this data. */ + struct kfd_dev *dev; + + + /* per-process-per device QCM data structure */ + struct qcm_process_device qpd; + + /*Apertures*/ + uint64_t lds_base; + uint64_t lds_limit; + uint64_t gpuvm_base; + uint64_t gpuvm_limit; + uint64_t scratch_base; + uint64_t scratch_limit; + + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ + bool bound; +}; + +#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) + +/* Process data */ +struct kfd_process { + /* + * kfd_process are stored in an mm_struct*->kfd_process* + * hash table (kfd_processes in kfd_process.c) + */ + struct hlist_node kfd_processes; + + struct mm_struct *mm; + + struct mutex mutex; + + /* + * In any process, the thread that started main() is the lead + * thread and outlives the rest. + * It is here because amd_iommu_bind_pasid wants a task_struct. + */ + struct task_struct *lead_thread; + + /* We want to receive a notification when the mm_struct is destroyed */ + struct mmu_notifier mmu_notifier; + + /* Use for delayed freeing of kfd_process structure */ + struct rcu_head rcu; + + unsigned int pasid; + + /* + * List of kfd_process_device structures, + * one for each device the process is using. + */ + struct list_head per_device_data; + + struct process_queue_manager pqm; + + /* The process's queues. */ + size_t queue_array_size; + + /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ + struct kfd_queue **queues; + + unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; + + /*Is the user space process 32 bit?*/ + bool is_32bit_user_mode; +}; + +void kfd_process_create_wq(void); +void kfd_process_destroy_wq(void); +struct kfd_process *kfd_create_process(const struct task_struct *); +struct kfd_process *kfd_get_process(const struct task_struct *); + +struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, + struct kfd_process *p); +void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); +struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, + struct kfd_process *p, + int create_pdd); + +/* Process device data iterator */ +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, + struct kfd_process_device *pdd); +bool kfd_has_process_device_data(struct kfd_process *p); + +/* PASIDs */ +int kfd_pasid_init(void); +void kfd_pasid_exit(void); +bool kfd_set_pasid_limit(unsigned int new_limit); +unsigned int kfd_get_pasid_limit(void); +unsigned int kfd_pasid_alloc(void); +void kfd_pasid_free(unsigned int pasid); + +/* Doorbells */ +void kfd_doorbell_init(struct kfd_dev *kfd); +int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); +u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, + unsigned int *doorbell_off); +void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); +u32 read_kernel_doorbell(u32 __iomem *db); +void write_kernel_doorbell(u32 __iomem *db, u32 value); +unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, + struct kfd_process *process, + unsigned int queue_id); + +extern struct device *kfd_device; + +/* Topology */ +int kfd_topology_init(void); +void kfd_topology_shutdown(void); +int kfd_topology_add_device(struct kfd_dev *gpu); +int kfd_topology_remove_device(struct kfd_dev *gpu); +struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); +struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); +struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); + +/* Interrupts */ +int kfd_interrupt_init(struct kfd_dev *dev); +void kfd_interrupt_exit(struct kfd_dev *dev); +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); +bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); + +/* Power Management */ +void kgd2kfd_suspend(struct kfd_dev *kfd); +int kgd2kfd_resume(struct kfd_dev *kfd); + +/* amdkfd Apertures */ +int kfd_init_apertures(struct kfd_process *process); + +/* Queue Context Management */ +inline uint32_t lower_32(uint64_t x); +inline uint32_t upper_32(uint64_t x); + +int init_queue(struct queue **q, struct queue_properties properties); +void uninit_queue(struct queue *q); +void print_queue_properties(struct queue_properties *q); +void print_queue(struct queue *q); + +struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); +struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); +void device_queue_manager_uninit(struct device_queue_manager *dqm); +struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, + enum kfd_queue_type type); +void kernel_queue_uninit(struct kernel_queue *kq); + +/* Process Queue Manager */ +struct process_queue_node { + struct queue *q; + struct kernel_queue *kq; + struct list_head process_queue_list; +}; + +int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); +void pqm_uninit(struct process_queue_manager *pqm); +int pqm_create_queue(struct process_queue_manager *pqm, + struct kfd_dev *dev, + struct file *f, + struct queue_properties *properties, + unsigned int flags, + enum kfd_queue_type type, + unsigned int *qid); +int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); +int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, + struct queue_properties *p); + +/* Packet Manager */ + +#define KFD_HIQ_TIMEOUT (500) + +#define KFD_FENCE_COMPLETED (100) +#define KFD_FENCE_INIT (10) +#define KFD_UNMAP_LATENCY (150) + +struct packet_manager { + struct device_queue_manager *dqm; + struct kernel_queue *priv_queue; + struct mutex lock; + bool allocated; + struct kfd_mem_obj *ib_buffer_obj; +}; + +int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); +void pm_uninit(struct packet_manager *pm); +int pm_send_set_resources(struct packet_manager *pm, + struct scheduling_resources *res); +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); +int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, + uint32_t fence_value); + +int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, + enum kfd_preempt_type_filter mode, + uint32_t filter_param, bool reset, + unsigned int sdma_engine); + +void pm_release_ib(struct packet_manager *pm); + +uint64_t kfd_get_number_elems(struct kfd_dev *kfd); +phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, + struct kfd_process *process); + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c new file mode 100644 index 0000000000000000000000000000000000000000..b85eb0b830b41d5d894efebb2a3e6fab83ca7552 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -0,0 +1,410 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +struct mm_struct; + +#include "kfd_priv.h" + +/* + * Initial size for the array of queues. + * The allocated size is doubled each time + * it is exceeded up to MAX_PROCESS_QUEUES. + */ +#define INITIAL_QUEUE_ARRAY_SIZE 16 + +/* + * List of struct kfd_process (field kfd_process). + * Unique/indexed by mm_struct* + */ +#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ +static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); +static DEFINE_MUTEX(kfd_processes_mutex); + +DEFINE_STATIC_SRCU(kfd_processes_srcu); + +static struct workqueue_struct *kfd_process_wq; + +struct kfd_process_release_work { + struct work_struct kfd_work; + struct kfd_process *p; +}; + +static struct kfd_process *find_process(const struct task_struct *thread); +static struct kfd_process *create_process(const struct task_struct *thread); + +void kfd_process_create_wq(void) +{ + if (!kfd_process_wq) + kfd_process_wq = create_workqueue("kfd_process_wq"); +} + +void kfd_process_destroy_wq(void) +{ + if (kfd_process_wq) { + flush_workqueue(kfd_process_wq); + destroy_workqueue(kfd_process_wq); + kfd_process_wq = NULL; + } +} + +struct kfd_process *kfd_create_process(const struct task_struct *thread) +{ + struct kfd_process *process; + + BUG_ON(!kfd_process_wq); + + if (thread->mm == NULL) + return ERR_PTR(-EINVAL); + + /* Only the pthreads threading model is supported. */ + if (thread->group_leader->mm != thread->mm) + return ERR_PTR(-EINVAL); + + /* Take mmap_sem because we call __mmu_notifier_register inside */ + down_write(&thread->mm->mmap_sem); + + /* + * take kfd processes mutex before starting of process creation + * so there won't be a case where two threads of the same process + * create two kfd_process structures + */ + mutex_lock(&kfd_processes_mutex); + + /* A prior open of /dev/kfd could have already created the process. */ + process = find_process(thread); + if (process) + pr_debug("kfd: process already found\n"); + + if (!process) + process = create_process(thread); + + mutex_unlock(&kfd_processes_mutex); + + up_write(&thread->mm->mmap_sem); + + return process; +} + +struct kfd_process *kfd_get_process(const struct task_struct *thread) +{ + struct kfd_process *process; + + if (thread->mm == NULL) + return ERR_PTR(-EINVAL); + + /* Only the pthreads threading model is supported. */ + if (thread->group_leader->mm != thread->mm) + return ERR_PTR(-EINVAL); + + process = find_process(thread); + + return process; +} + +static struct kfd_process *find_process_by_mm(const struct mm_struct *mm) +{ + struct kfd_process *process; + + hash_for_each_possible_rcu(kfd_processes_table, process, + kfd_processes, (uintptr_t)mm) + if (process->mm == mm) + return process; + + return NULL; +} + +static struct kfd_process *find_process(const struct task_struct *thread) +{ + struct kfd_process *p; + int idx; + + idx = srcu_read_lock(&kfd_processes_srcu); + p = find_process_by_mm(thread->mm); + srcu_read_unlock(&kfd_processes_srcu, idx); + + return p; +} + +static void kfd_process_wq_release(struct work_struct *work) +{ + struct kfd_process_release_work *my_work; + struct kfd_process_device *pdd, *temp; + struct kfd_process *p; + + my_work = (struct kfd_process_release_work *) work; + + p = my_work->p; + + mutex_lock(&p->mutex); + + list_for_each_entry_safe(pdd, temp, &p->per_device_data, + per_device_list) { + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); + list_del(&pdd->per_device_list); + + kfree(pdd); + } + + kfd_pasid_free(p->pasid); + + mutex_unlock(&p->mutex); + + mutex_destroy(&p->mutex); + + kfree(p->queues); + + kfree(p); + + kfree((void *)work); +} + +static void kfd_process_destroy_delayed(struct rcu_head *rcu) +{ + struct kfd_process_release_work *work; + struct kfd_process *p; + + BUG_ON(!kfd_process_wq); + + p = container_of(rcu, struct kfd_process, rcu); + BUG_ON(atomic_read(&p->mm->mm_count) <= 0); + + mmdrop(p->mm); + + work = (struct kfd_process_release_work *) + kmalloc(sizeof(struct kfd_process_release_work), GFP_ATOMIC); + + if (work) { + INIT_WORK((struct work_struct *) work, kfd_process_wq_release); + work->p = p; + queue_work(kfd_process_wq, (struct work_struct *) work); + } +} + +static void kfd_process_notifier_release(struct mmu_notifier *mn, + struct mm_struct *mm) +{ + struct kfd_process *p; + + /* + * The kfd_process structure can not be free because the + * mmu_notifier srcu is read locked + */ + p = container_of(mn, struct kfd_process, mmu_notifier); + BUG_ON(p->mm != mm); + + mutex_lock(&kfd_processes_mutex); + hash_del_rcu(&p->kfd_processes); + mutex_unlock(&kfd_processes_mutex); + synchronize_srcu(&kfd_processes_srcu); + + mutex_lock(&p->mutex); + + /* In case our notifier is called before IOMMU notifier */ + pqm_uninit(&p->pqm); + + mutex_unlock(&p->mutex); + + /* + * Because we drop mm_count inside kfd_process_destroy_delayed + * and because the mmu_notifier_unregister function also drop + * mm_count we need to take an extra count here. + */ + atomic_inc(&p->mm->mm_count); + mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); + mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); +} + +static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { + .release = kfd_process_notifier_release, +}; + +static struct kfd_process *create_process(const struct task_struct *thread) +{ + struct kfd_process *process; + int err = -ENOMEM; + + process = kzalloc(sizeof(*process), GFP_KERNEL); + + if (!process) + goto err_alloc_process; + + process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE, + sizeof(process->queues[0]), GFP_KERNEL); + if (!process->queues) + goto err_alloc_queues; + + process->pasid = kfd_pasid_alloc(); + if (process->pasid == 0) + goto err_alloc_pasid; + + mutex_init(&process->mutex); + + process->mm = thread->mm; + + /* register notifier */ + process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; + err = __mmu_notifier_register(&process->mmu_notifier, process->mm); + if (err) + goto err_mmu_notifier; + + hash_add_rcu(kfd_processes_table, &process->kfd_processes, + (uintptr_t)process->mm); + + process->lead_thread = thread->group_leader; + + process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE; + + INIT_LIST_HEAD(&process->per_device_data); + + err = pqm_init(&process->pqm, process); + if (err != 0) + goto err_process_pqm_init; + + return process; + +err_process_pqm_init: + hash_del_rcu(&process->kfd_processes); + synchronize_rcu(); + mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm); +err_mmu_notifier: + kfd_pasid_free(process->pasid); +err_alloc_pasid: + kfree(process->queues); +err_alloc_queues: + kfree(process); +err_alloc_process: + return ERR_PTR(err); +} + +struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, + struct kfd_process *p, + int create_pdd) +{ + struct kfd_process_device *pdd = NULL; + + list_for_each_entry(pdd, &p->per_device_data, per_device_list) + if (pdd->dev == dev) + return pdd; + + if (create_pdd) { + pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); + if (pdd != NULL) { + pdd->dev = dev; + INIT_LIST_HEAD(&pdd->qpd.queues_list); + INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); + pdd->qpd.dqm = dev->dqm; + list_add(&pdd->per_device_list, &p->per_device_data); + } + } + + return pdd; +} + +/* + * Direct the IOMMU to bind the process (specifically the pasid->mm) + * to the device. + * Unbinding occurs when the process dies or the device is removed. + * + * Assumes that the process lock is held. + */ +struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, + struct kfd_process *p) +{ + struct kfd_process_device *pdd = kfd_get_process_device_data(dev, p, 1); + int err; + + if (pdd == NULL) + return ERR_PTR(-ENOMEM); + + if (pdd->bound) + return pdd; + + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); + if (err < 0) + return ERR_PTR(err); + + pdd->bound = true; + + return pdd; +} + +void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) +{ + struct kfd_process *p; + struct kfd_process_device *pdd; + int idx, i; + + BUG_ON(dev == NULL); + + idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes) + if (p->pasid == pasid) + break; + + srcu_read_unlock(&kfd_processes_srcu, idx); + + BUG_ON(p->pasid != pasid); + + mutex_lock(&p->mutex); + + pqm_uninit(&p->pqm); + + pdd = kfd_get_process_device_data(dev, p, 0); + + /* + * Just mark pdd as unbound, because we still need it to call + * amd_iommu_unbind_pasid() in when the process exits. + * We don't call amd_iommu_unbind_pasid() here + * because the IOMMU called us. + */ + if (pdd) + pdd->bound = false; + + mutex_unlock(&p->mutex); +} + +struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p) +{ + return list_first_entry(&p->per_device_data, + struct kfd_process_device, + per_device_list); +} + +struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, + struct kfd_process_device *pdd) +{ + if (list_is_last(&pdd->per_device_list, &p->per_device_data)) + return NULL; + return list_next_entry(pdd, per_device_list); +} + +bool kfd_has_process_device_data(struct kfd_process *p) +{ + return !(list_empty(&p->per_device_data)); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c new file mode 100644 index 0000000000000000000000000000000000000000..47526780d736ced5470bfb2822a3892f879b0320 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -0,0 +1,343 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include "kfd_device_queue_manager.h" +#include "kfd_priv.h" +#include "kfd_kernel_queue.h" + +static inline struct process_queue_node *get_queue_by_qid( + struct process_queue_manager *pqm, unsigned int qid) +{ + struct process_queue_node *pqn; + + BUG_ON(!pqm); + + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { + if (pqn->q && pqn->q->properties.queue_id == qid) + return pqn; + if (pqn->kq && pqn->kq->queue->properties.queue_id == qid) + return pqn; + } + + return NULL; +} + +static int find_available_queue_slot(struct process_queue_manager *pqm, + unsigned int *qid) +{ + unsigned long found; + + BUG_ON(!pqm || !qid); + + pr_debug("kfd: in %s\n", __func__); + + found = find_first_zero_bit(pqm->queue_slot_bitmap, + max_num_of_queues_per_process); + + pr_debug("kfd: the new slot id %lu\n", found); + + if (found >= max_num_of_queues_per_process) { + pr_info("amdkfd: Can not open more queues for process with pasid %d\n", + pqm->process->pasid); + return -ENOMEM; + } + + set_bit(found, pqm->queue_slot_bitmap); + *qid = found; + + return 0; +} + +int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p) +{ + BUG_ON(!pqm); + + INIT_LIST_HEAD(&pqm->queues); + pqm->queue_slot_bitmap = + kzalloc(DIV_ROUND_UP(max_num_of_queues_per_process, + BITS_PER_BYTE), GFP_KERNEL); + if (pqm->queue_slot_bitmap == NULL) + return -ENOMEM; + pqm->process = p; + + return 0; +} + +void pqm_uninit(struct process_queue_manager *pqm) +{ + int retval; + struct process_queue_node *pqn, *next; + + BUG_ON(!pqm); + + pr_debug("In func %s\n", __func__); + + list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) { + retval = pqm_destroy_queue( + pqm, + (pqn->q != NULL) ? + pqn->q->properties.queue_id : + pqn->kq->queue->properties.queue_id); + + if (retval != 0) { + pr_err("kfd: failed to destroy queue\n"); + return; + } + } + kfree(pqm->queue_slot_bitmap); + pqm->queue_slot_bitmap = NULL; +} + +static int create_cp_queue(struct process_queue_manager *pqm, + struct kfd_dev *dev, struct queue **q, + struct queue_properties *q_properties, + struct file *f, unsigned int qid) +{ + int retval; + + retval = 0; + + /* Doorbell initialized in user space*/ + q_properties->doorbell_ptr = NULL; + + q_properties->doorbell_off = + kfd_queue_id_to_doorbell(dev, pqm->process, qid); + + /* let DQM handle it*/ + q_properties->vmid = 0; + q_properties->queue_id = qid; + q_properties->type = KFD_QUEUE_TYPE_COMPUTE; + + retval = init_queue(q, *q_properties); + if (retval != 0) + goto err_init_queue; + + (*q)->device = dev; + (*q)->process = pqm->process; + + pr_debug("kfd: PQM After init queue"); + + return retval; + +err_init_queue: + return retval; +} + +int pqm_create_queue(struct process_queue_manager *pqm, + struct kfd_dev *dev, + struct file *f, + struct queue_properties *properties, + unsigned int flags, + enum kfd_queue_type type, + unsigned int *qid) +{ + int retval; + struct kfd_process_device *pdd; + struct queue_properties q_properties; + struct queue *q; + struct process_queue_node *pqn; + struct kernel_queue *kq; + + BUG_ON(!pqm || !dev || !properties || !qid); + + memset(&q_properties, 0, sizeof(struct queue_properties)); + memcpy(&q_properties, properties, sizeof(struct queue_properties)); + q = NULL; + kq = NULL; + + pdd = kfd_get_process_device_data(dev, pqm->process, 1); + BUG_ON(!pdd); + + retval = find_available_queue_slot(pqm, qid); + if (retval != 0) + return retval; + + if (list_empty(&pqm->queues)) { + pdd->qpd.pqm = pqm; + dev->dqm->register_process(dev->dqm, &pdd->qpd); + } + + pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL); + if (!pqn) { + retval = -ENOMEM; + goto err_allocate_pqn; + } + + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + /* check if there is over subscription */ + if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) && + ((dev->dqm->processes_count >= VMID_PER_DEVICE) || + (dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) { + pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n"); + retval = -EPERM; + goto err_create_queue; + } + + retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid); + if (retval != 0) + goto err_create_queue; + pqn->q = q; + pqn->kq = NULL; + retval = dev->dqm->create_queue(dev->dqm, q, &pdd->qpd, + &q->properties.vmid); + print_queue(q); + break; + case KFD_QUEUE_TYPE_DIQ: + kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ); + if (kq == NULL) { + retval = -ENOMEM; + goto err_create_queue; + } + kq->queue->properties.queue_id = *qid; + pqn->kq = kq; + pqn->q = NULL; + retval = dev->dqm->create_kernel_queue(dev->dqm, kq, &pdd->qpd); + break; + default: + BUG(); + break; + } + + if (retval != 0) { + pr_err("kfd: error dqm create queue\n"); + goto err_create_queue; + } + + pr_debug("kfd: PQM After DQM create queue\n"); + + list_add(&pqn->process_queue_list, &pqm->queues); + + if (q) { + *properties = q->properties; + pr_debug("kfd: PQM done creating queue\n"); + print_queue_properties(properties); + } + + return retval; + +err_create_queue: + kfree(pqn); +err_allocate_pqn: + clear_bit(*qid, pqm->queue_slot_bitmap); + return retval; +} + +int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) +{ + struct process_queue_node *pqn; + struct kfd_process_device *pdd; + struct device_queue_manager *dqm; + struct kfd_dev *dev; + int retval; + + dqm = NULL; + + BUG_ON(!pqm); + retval = 0; + + pr_debug("kfd: In Func %s\n", __func__); + + pqn = get_queue_by_qid(pqm, qid); + if (pqn == NULL) { + pr_err("kfd: queue id does not match any known queue\n"); + return -EINVAL; + } + + dev = NULL; + if (pqn->kq) + dev = pqn->kq->dev; + if (pqn->q) + dev = pqn->q->device; + BUG_ON(!dev); + + pdd = kfd_get_process_device_data(dev, pqm->process, 1); + BUG_ON(!pdd); + + if (pqn->kq) { + /* destroy kernel queue (DIQ) */ + dqm = pqn->kq->dev->dqm; + dqm->destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd); + kernel_queue_uninit(pqn->kq); + } + + if (pqn->q) { + dqm = pqn->q->device->dqm; + retval = dqm->destroy_queue(dqm, &pdd->qpd, pqn->q); + if (retval != 0) + return retval; + + uninit_queue(pqn->q); + } + + list_del(&pqn->process_queue_list); + kfree(pqn); + clear_bit(qid, pqm->queue_slot_bitmap); + + if (list_empty(&pqm->queues)) + dqm->unregister_process(dqm, &pdd->qpd); + + return retval; +} + +int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, + struct queue_properties *p) +{ + int retval; + struct process_queue_node *pqn; + + BUG_ON(!pqm); + + pqn = get_queue_by_qid(pqm, qid); + BUG_ON(!pqn); + + pqn->q->properties.queue_address = p->queue_address; + pqn->q->properties.queue_size = p->queue_size; + pqn->q->properties.queue_percent = p->queue_percent; + pqn->q->properties.priority = p->priority; + + retval = pqn->q->device->dqm->update_queue(pqn->q->device->dqm, pqn->q); + if (retval != 0) + return retval; + + return 0; +} + +static __attribute__((unused)) struct kernel_queue *pqm_get_kernel_queue( + struct process_queue_manager *pqm, + unsigned int qid) +{ + struct process_queue_node *pqn; + + BUG_ON(!pqm); + + pqn = get_queue_by_qid(pqm, qid); + if (pqn && pqn->kq) + return pqn->kq; + + return NULL; +} + + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c new file mode 100644 index 0000000000000000000000000000000000000000..9a0c90b0702eef4ced8ff7cc9ec2ab2cb23b57c8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -0,0 +1,85 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include "kfd_priv.h" + +void print_queue_properties(struct queue_properties *q) +{ + if (!q) + return; + + pr_debug("Printing queue properties:\n"); + pr_debug("Queue Type: %u\n", q->type); + pr_debug("Queue Size: %llu\n", q->queue_size); + pr_debug("Queue percent: %u\n", q->queue_percent); + pr_debug("Queue Address: 0x%llX\n", q->queue_address); + pr_debug("Queue Id: %u\n", q->queue_id); + pr_debug("Queue Process Vmid: %u\n", q->vmid); + pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr); + pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr); + pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr); + pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off); +} + +void print_queue(struct queue *q) +{ + if (!q) + return; + pr_debug("Printing queue:\n"); + pr_debug("Queue Type: %u\n", q->properties.type); + pr_debug("Queue Size: %llu\n", q->properties.queue_size); + pr_debug("Queue percent: %u\n", q->properties.queue_percent); + pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address); + pr_debug("Queue Id: %u\n", q->properties.queue_id); + pr_debug("Queue Process Vmid: %u\n", q->properties.vmid); + pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr); + pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr); + pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr); + pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off); + pr_debug("Queue MQD Address: 0x%p\n", q->mqd); + pr_debug("Queue MQD Gart: 0x%llX\n", q->gart_mqd_addr); + pr_debug("Queue Process Address: 0x%p\n", q->process); + pr_debug("Queue Device Address: 0x%p\n", q->device); +} + +int init_queue(struct queue **q, struct queue_properties properties) +{ + struct queue *tmp; + + BUG_ON(!q); + + tmp = kzalloc(sizeof(struct queue), GFP_KERNEL); + if (!tmp) + return -ENOMEM; + + memcpy(&tmp->properties, &properties, sizeof(struct queue_properties)); + + *q = tmp; + return 0; +} + +void uninit_queue(struct queue *q) +{ + kfree(q); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c new file mode 100644 index 0000000000000000000000000000000000000000..5733e2859e8aabbc361ce821f5e337c0008dd6a3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -0,0 +1,1235 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kfd_priv.h" +#include "kfd_crat.h" +#include "kfd_topology.h" + +static struct list_head topology_device_list; +static int topology_crat_parsed; +static struct kfd_system_properties sys_props; + +static DECLARE_RWSEM(topology_lock); + +struct kfd_dev *kfd_device_by_id(uint32_t gpu_id) +{ + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; + + down_read(&topology_lock); + + list_for_each_entry(top_dev, &topology_device_list, list) + if (top_dev->gpu_id == gpu_id) { + device = top_dev->gpu; + break; + } + + up_read(&topology_lock); + + return device; +} + +struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev) +{ + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; + + down_read(&topology_lock); + + list_for_each_entry(top_dev, &topology_device_list, list) + if (top_dev->gpu->pdev == pdev) { + device = top_dev->gpu; + break; + } + + up_read(&topology_lock); + + return device; +} + +static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size) +{ + struct acpi_table_header *crat_table; + acpi_status status; + + if (!size) + return -EINVAL; + + /* + * Fetch the CRAT table from ACPI + */ + status = acpi_get_table(CRAT_SIGNATURE, 0, &crat_table); + if (status == AE_NOT_FOUND) { + pr_warn("CRAT table not found\n"); + return -ENODATA; + } else if (ACPI_FAILURE(status)) { + const char *err = acpi_format_exception(status); + + pr_err("CRAT table error: %s\n", err); + return -EINVAL; + } + + if (*size >= crat_table->length && crat_image != NULL) + memcpy(crat_image, crat_table, crat_table->length); + + *size = crat_table->length; + + return 0; +} + +static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + BUG_ON(!dev); + BUG_ON(!cu); + + dev->node_props.cpu_cores_count = cu->num_cpu_cores; + dev->node_props.cpu_core_id_base = cu->processor_id_low; + if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT) + dev->node_props.capability |= HSA_CAP_ATS_PRESENT; + + pr_info("CU CPU: cores=%d id_base=%d\n", cu->num_cpu_cores, + cu->processor_id_low); +} + +static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev, + struct crat_subtype_computeunit *cu) +{ + BUG_ON(!dev); + BUG_ON(!cu); + + dev->node_props.simd_id_base = cu->processor_id_low; + dev->node_props.simd_count = cu->num_simd_cores; + dev->node_props.lds_size_in_kb = cu->lds_size_in_kb; + dev->node_props.max_waves_per_simd = cu->max_waves_simd; + dev->node_props.wave_front_size = cu->wave_front_size; + dev->node_props.mem_banks_count = cu->num_banks; + dev->node_props.array_count = cu->num_arrays; + dev->node_props.cu_per_simd_array = cu->num_cu_per_array; + dev->node_props.simd_per_cu = cu->num_simd_per_cu; + dev->node_props.max_slots_scratch_cu = cu->max_slots_scatch_cu; + if (cu->hsa_capability & CRAT_CU_FLAGS_HOT_PLUGGABLE) + dev->node_props.capability |= HSA_CAP_HOT_PLUGGABLE; + pr_info("CU GPU: simds=%d id_base=%d\n", cu->num_simd_cores, + cu->processor_id_low); +} + +/* kfd_parse_subtype_cu is called when the topology mutex is already acquired */ +static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu) +{ + struct kfd_topology_device *dev; + int i = 0; + + BUG_ON(!cu); + + pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n", + cu->proximity_domain, cu->hsa_capability); + list_for_each_entry(dev, &topology_device_list, list) { + if (cu->proximity_domain == i) { + if (cu->flags & CRAT_CU_FLAGS_CPU_PRESENT) + kfd_populated_cu_info_cpu(dev, cu); + + if (cu->flags & CRAT_CU_FLAGS_GPU_PRESENT) + kfd_populated_cu_info_gpu(dev, cu); + break; + } + i++; + } + + return 0; +} + +/* + * kfd_parse_subtype_mem is called when the topology mutex is + * already acquired + */ +static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem) +{ + struct kfd_mem_properties *props; + struct kfd_topology_device *dev; + int i = 0; + + BUG_ON(!mem); + + pr_info("Found memory entry in CRAT table with proximity_domain=%d\n", + mem->promixity_domain); + list_for_each_entry(dev, &topology_device_list, list) { + if (mem->promixity_domain == i) { + props = kfd_alloc_struct(props); + if (props == NULL) + return -ENOMEM; + + if (dev->node_props.cpu_cores_count == 0) + props->heap_type = HSA_MEM_HEAP_TYPE_FB_PRIVATE; + else + props->heap_type = HSA_MEM_HEAP_TYPE_SYSTEM; + + if (mem->flags & CRAT_MEM_FLAGS_HOT_PLUGGABLE) + props->flags |= HSA_MEM_FLAGS_HOT_PLUGGABLE; + if (mem->flags & CRAT_MEM_FLAGS_NON_VOLATILE) + props->flags |= HSA_MEM_FLAGS_NON_VOLATILE; + + props->size_in_bytes = + ((uint64_t)mem->length_high << 32) + + mem->length_low; + props->width = mem->width; + + dev->mem_bank_count++; + list_add_tail(&props->list, &dev->mem_props); + + break; + } + i++; + } + + return 0; +} + +/* + * kfd_parse_subtype_cache is called when the topology mutex + * is already acquired + */ +static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache) +{ + struct kfd_cache_properties *props; + struct kfd_topology_device *dev; + uint32_t id; + + BUG_ON(!cache); + + id = cache->processor_id_low; + + pr_info("Found cache entry in CRAT table with processor_id=%d\n", id); + list_for_each_entry(dev, &topology_device_list, list) + if (id == dev->node_props.cpu_core_id_base || + id == dev->node_props.simd_id_base) { + props = kfd_alloc_struct(props); + if (props == NULL) + return -ENOMEM; + + props->processor_id_low = id; + props->cache_level = cache->cache_level; + props->cache_size = cache->cache_size; + props->cacheline_size = cache->cache_line_size; + props->cachelines_per_tag = cache->lines_per_tag; + props->cache_assoc = cache->associativity; + props->cache_latency = cache->cache_latency; + + if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE) + props->cache_type |= HSA_CACHE_TYPE_DATA; + if (cache->flags & CRAT_CACHE_FLAGS_INST_CACHE) + props->cache_type |= HSA_CACHE_TYPE_INSTRUCTION; + if (cache->flags & CRAT_CACHE_FLAGS_CPU_CACHE) + props->cache_type |= HSA_CACHE_TYPE_CPU; + if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE) + props->cache_type |= HSA_CACHE_TYPE_HSACU; + + dev->cache_count++; + dev->node_props.caches_count++; + list_add_tail(&props->list, &dev->cache_props); + + break; + } + + return 0; +} + +/* + * kfd_parse_subtype_iolink is called when the topology mutex + * is already acquired + */ +static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink) +{ + struct kfd_iolink_properties *props; + struct kfd_topology_device *dev; + uint32_t i = 0; + uint32_t id_from; + uint32_t id_to; + + BUG_ON(!iolink); + + id_from = iolink->proximity_domain_from; + id_to = iolink->proximity_domain_to; + + pr_info("Found IO link entry in CRAT table with id_from=%d\n", id_from); + list_for_each_entry(dev, &topology_device_list, list) { + if (id_from == i) { + props = kfd_alloc_struct(props); + if (props == NULL) + return -ENOMEM; + + props->node_from = id_from; + props->node_to = id_to; + props->ver_maj = iolink->version_major; + props->ver_min = iolink->version_minor; + + /* + * weight factor (derived from CDIR), currently always 1 + */ + props->weight = 1; + + props->min_latency = iolink->minimum_latency; + props->max_latency = iolink->maximum_latency; + props->min_bandwidth = iolink->minimum_bandwidth_mbs; + props->max_bandwidth = iolink->maximum_bandwidth_mbs; + props->rec_transfer_size = + iolink->recommended_transfer_size; + + dev->io_link_count++; + dev->node_props.io_links_count++; + list_add_tail(&props->list, &dev->io_link_props); + + break; + } + i++; + } + + return 0; +} + +static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr) +{ + struct crat_subtype_computeunit *cu; + struct crat_subtype_memory *mem; + struct crat_subtype_cache *cache; + struct crat_subtype_iolink *iolink; + int ret = 0; + + BUG_ON(!sub_type_hdr); + + switch (sub_type_hdr->type) { + case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY: + cu = (struct crat_subtype_computeunit *)sub_type_hdr; + ret = kfd_parse_subtype_cu(cu); + break; + case CRAT_SUBTYPE_MEMORY_AFFINITY: + mem = (struct crat_subtype_memory *)sub_type_hdr; + ret = kfd_parse_subtype_mem(mem); + break; + case CRAT_SUBTYPE_CACHE_AFFINITY: + cache = (struct crat_subtype_cache *)sub_type_hdr; + ret = kfd_parse_subtype_cache(cache); + break; + case CRAT_SUBTYPE_TLB_AFFINITY: + /* + * For now, nothing to do here + */ + pr_info("Found TLB entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_CCOMPUTE_AFFINITY: + /* + * For now, nothing to do here + */ + pr_info("Found CCOMPUTE entry in CRAT table (not processing)\n"); + break; + case CRAT_SUBTYPE_IOLINK_AFFINITY: + iolink = (struct crat_subtype_iolink *)sub_type_hdr; + ret = kfd_parse_subtype_iolink(iolink); + break; + default: + pr_warn("Unknown subtype (%d) in CRAT\n", + sub_type_hdr->type); + } + + return ret; +} + +static void kfd_release_topology_device(struct kfd_topology_device *dev) +{ + struct kfd_mem_properties *mem; + struct kfd_cache_properties *cache; + struct kfd_iolink_properties *iolink; + + BUG_ON(!dev); + + list_del(&dev->list); + + while (dev->mem_props.next != &dev->mem_props) { + mem = container_of(dev->mem_props.next, + struct kfd_mem_properties, list); + list_del(&mem->list); + kfree(mem); + } + + while (dev->cache_props.next != &dev->cache_props) { + cache = container_of(dev->cache_props.next, + struct kfd_cache_properties, list); + list_del(&cache->list); + kfree(cache); + } + + while (dev->io_link_props.next != &dev->io_link_props) { + iolink = container_of(dev->io_link_props.next, + struct kfd_iolink_properties, list); + list_del(&iolink->list); + kfree(iolink); + } + + kfree(dev); + + sys_props.num_devices--; +} + +static void kfd_release_live_view(void) +{ + struct kfd_topology_device *dev; + + while (topology_device_list.next != &topology_device_list) { + dev = container_of(topology_device_list.next, + struct kfd_topology_device, list); + kfd_release_topology_device(dev); +} + + memset(&sys_props, 0, sizeof(sys_props)); +} + +static struct kfd_topology_device *kfd_create_topology_device(void) +{ + struct kfd_topology_device *dev; + + dev = kfd_alloc_struct(dev); + if (dev == NULL) { + pr_err("No memory to allocate a topology device"); + return NULL; + } + + INIT_LIST_HEAD(&dev->mem_props); + INIT_LIST_HEAD(&dev->cache_props); + INIT_LIST_HEAD(&dev->io_link_props); + + list_add_tail(&dev->list, &topology_device_list); + sys_props.num_devices++; + + return dev; +} + +static int kfd_parse_crat_table(void *crat_image) +{ + struct kfd_topology_device *top_dev; + struct crat_subtype_generic *sub_type_hdr; + uint16_t node_id; + int ret; + struct crat_header *crat_table = (struct crat_header *)crat_image; + uint16_t num_nodes; + uint32_t image_len; + + if (!crat_image) + return -EINVAL; + + num_nodes = crat_table->num_domains; + image_len = crat_table->length; + + pr_info("Parsing CRAT table with %d nodes\n", num_nodes); + + for (node_id = 0; node_id < num_nodes; node_id++) { + top_dev = kfd_create_topology_device(); + if (!top_dev) { + kfd_release_live_view(); + return -ENOMEM; + } + } + + sys_props.platform_id = + (*((uint64_t *)crat_table->oem_id)) & CRAT_OEMID_64BIT_MASK; + sys_props.platform_oem = *((uint64_t *)crat_table->oem_table_id); + sys_props.platform_rev = crat_table->revision; + + sub_type_hdr = (struct crat_subtype_generic *)(crat_table+1); + while ((char *)sub_type_hdr + sizeof(struct crat_subtype_generic) < + ((char *)crat_image) + image_len) { + if (sub_type_hdr->flags & CRAT_SUBTYPE_FLAGS_ENABLED) { + ret = kfd_parse_subtype(sub_type_hdr); + if (ret != 0) { + kfd_release_live_view(); + return ret; + } + } + + sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr + + sub_type_hdr->length); + } + + sys_props.generation_count++; + topology_crat_parsed = 1; + + return 0; +} + + +#define sysfs_show_gen_prop(buffer, fmt, ...) \ + snprintf(buffer, PAGE_SIZE, "%s"fmt, buffer, __VA_ARGS__) +#define sysfs_show_32bit_prop(buffer, name, value) \ + sysfs_show_gen_prop(buffer, "%s %u\n", name, value) +#define sysfs_show_64bit_prop(buffer, name, value) \ + sysfs_show_gen_prop(buffer, "%s %llu\n", name, value) +#define sysfs_show_32bit_val(buffer, value) \ + sysfs_show_gen_prop(buffer, "%u\n", value) +#define sysfs_show_str_val(buffer, value) \ + sysfs_show_gen_prop(buffer, "%s\n", value) + +static ssize_t sysprops_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + ssize_t ret; + + /* Making sure that the buffer is an empty string */ + buffer[0] = 0; + + if (attr == &sys_props.attr_genid) { + ret = sysfs_show_32bit_val(buffer, sys_props.generation_count); + } else if (attr == &sys_props.attr_props) { + sysfs_show_64bit_prop(buffer, "platform_oem", + sys_props.platform_oem); + sysfs_show_64bit_prop(buffer, "platform_id", + sys_props.platform_id); + ret = sysfs_show_64bit_prop(buffer, "platform_rev", + sys_props.platform_rev); + } else { + ret = -EINVAL; + } + + return ret; +} + +static const struct sysfs_ops sysprops_ops = { + .show = sysprops_show, +}; + +static struct kobj_type sysprops_type = { + .sysfs_ops = &sysprops_ops, +}; + +static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + ssize_t ret; + struct kfd_iolink_properties *iolink; + + /* Making sure that the buffer is an empty string */ + buffer[0] = 0; + + iolink = container_of(attr, struct kfd_iolink_properties, attr); + sysfs_show_32bit_prop(buffer, "type", iolink->iolink_type); + sysfs_show_32bit_prop(buffer, "version_major", iolink->ver_maj); + sysfs_show_32bit_prop(buffer, "version_minor", iolink->ver_min); + sysfs_show_32bit_prop(buffer, "node_from", iolink->node_from); + sysfs_show_32bit_prop(buffer, "node_to", iolink->node_to); + sysfs_show_32bit_prop(buffer, "weight", iolink->weight); + sysfs_show_32bit_prop(buffer, "min_latency", iolink->min_latency); + sysfs_show_32bit_prop(buffer, "max_latency", iolink->max_latency); + sysfs_show_32bit_prop(buffer, "min_bandwidth", iolink->min_bandwidth); + sysfs_show_32bit_prop(buffer, "max_bandwidth", iolink->max_bandwidth); + sysfs_show_32bit_prop(buffer, "recommended_transfer_size", + iolink->rec_transfer_size); + ret = sysfs_show_32bit_prop(buffer, "flags", iolink->flags); + + return ret; +} + +static const struct sysfs_ops iolink_ops = { + .show = iolink_show, +}; + +static struct kobj_type iolink_type = { + .sysfs_ops = &iolink_ops, +}; + +static ssize_t mem_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + ssize_t ret; + struct kfd_mem_properties *mem; + + /* Making sure that the buffer is an empty string */ + buffer[0] = 0; + + mem = container_of(attr, struct kfd_mem_properties, attr); + sysfs_show_32bit_prop(buffer, "heap_type", mem->heap_type); + sysfs_show_64bit_prop(buffer, "size_in_bytes", mem->size_in_bytes); + sysfs_show_32bit_prop(buffer, "flags", mem->flags); + sysfs_show_32bit_prop(buffer, "width", mem->width); + ret = sysfs_show_32bit_prop(buffer, "mem_clk_max", mem->mem_clk_max); + + return ret; +} + +static const struct sysfs_ops mem_ops = { + .show = mem_show, +}; + +static struct kobj_type mem_type = { + .sysfs_ops = &mem_ops, +}; + +static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + ssize_t ret; + uint32_t i; + struct kfd_cache_properties *cache; + + /* Making sure that the buffer is an empty string */ + buffer[0] = 0; + + cache = container_of(attr, struct kfd_cache_properties, attr); + sysfs_show_32bit_prop(buffer, "processor_id_low", + cache->processor_id_low); + sysfs_show_32bit_prop(buffer, "level", cache->cache_level); + sysfs_show_32bit_prop(buffer, "size", cache->cache_size); + sysfs_show_32bit_prop(buffer, "cache_line_size", cache->cacheline_size); + sysfs_show_32bit_prop(buffer, "cache_lines_per_tag", + cache->cachelines_per_tag); + sysfs_show_32bit_prop(buffer, "association", cache->cache_assoc); + sysfs_show_32bit_prop(buffer, "latency", cache->cache_latency); + sysfs_show_32bit_prop(buffer, "type", cache->cache_type); + snprintf(buffer, PAGE_SIZE, "%ssibling_map ", buffer); + for (i = 0; i < KFD_TOPOLOGY_CPU_SIBLINGS; i++) + ret = snprintf(buffer, PAGE_SIZE, "%s%d%s", + buffer, cache->sibling_map[i], + (i == KFD_TOPOLOGY_CPU_SIBLINGS-1) ? + "\n" : ","); + + return ret; +} + +static const struct sysfs_ops cache_ops = { + .show = kfd_cache_show, +}; + +static struct kobj_type cache_type = { + .sysfs_ops = &cache_ops, +}; + +static ssize_t node_show(struct kobject *kobj, struct attribute *attr, + char *buffer) +{ + ssize_t ret; + struct kfd_topology_device *dev; + char public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; + uint32_t i; + + /* Making sure that the buffer is an empty string */ + buffer[0] = 0; + + if (strcmp(attr->name, "gpu_id") == 0) { + dev = container_of(attr, struct kfd_topology_device, + attr_gpuid); + ret = sysfs_show_32bit_val(buffer, dev->gpu_id); + } else if (strcmp(attr->name, "name") == 0) { + dev = container_of(attr, struct kfd_topology_device, + attr_name); + for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE; i++) { + public_name[i] = + (char)dev->node_props.marketing_name[i]; + if (dev->node_props.marketing_name[i] == 0) + break; + } + public_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1] = 0x0; + ret = sysfs_show_str_val(buffer, public_name); + } else { + dev = container_of(attr, struct kfd_topology_device, + attr_props); + sysfs_show_32bit_prop(buffer, "cpu_cores_count", + dev->node_props.cpu_cores_count); + sysfs_show_32bit_prop(buffer, "simd_count", + dev->node_props.simd_count); + + if (dev->mem_bank_count < dev->node_props.mem_banks_count) { + pr_warn("kfd: mem_banks_count truncated from %d to %d\n", + dev->node_props.mem_banks_count, + dev->mem_bank_count); + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->mem_bank_count); + } else { + sysfs_show_32bit_prop(buffer, "mem_banks_count", + dev->node_props.mem_banks_count); + } + + sysfs_show_32bit_prop(buffer, "caches_count", + dev->node_props.caches_count); + sysfs_show_32bit_prop(buffer, "io_links_count", + dev->node_props.io_links_count); + sysfs_show_32bit_prop(buffer, "cpu_core_id_base", + dev->node_props.cpu_core_id_base); + sysfs_show_32bit_prop(buffer, "simd_id_base", + dev->node_props.simd_id_base); + sysfs_show_32bit_prop(buffer, "capability", + dev->node_props.capability); + sysfs_show_32bit_prop(buffer, "max_waves_per_simd", + dev->node_props.max_waves_per_simd); + sysfs_show_32bit_prop(buffer, "lds_size_in_kb", + dev->node_props.lds_size_in_kb); + sysfs_show_32bit_prop(buffer, "gds_size_in_kb", + dev->node_props.gds_size_in_kb); + sysfs_show_32bit_prop(buffer, "wave_front_size", + dev->node_props.wave_front_size); + sysfs_show_32bit_prop(buffer, "array_count", + dev->node_props.array_count); + sysfs_show_32bit_prop(buffer, "simd_arrays_per_engine", + dev->node_props.simd_arrays_per_engine); + sysfs_show_32bit_prop(buffer, "cu_per_simd_array", + dev->node_props.cu_per_simd_array); + sysfs_show_32bit_prop(buffer, "simd_per_cu", + dev->node_props.simd_per_cu); + sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu", + dev->node_props.max_slots_scratch_cu); + sysfs_show_32bit_prop(buffer, "engine_id", + dev->node_props.engine_id); + sysfs_show_32bit_prop(buffer, "vendor_id", + dev->node_props.vendor_id); + sysfs_show_32bit_prop(buffer, "device_id", + dev->node_props.device_id); + sysfs_show_32bit_prop(buffer, "location_id", + dev->node_props.location_id); + + if (dev->gpu) { + sysfs_show_32bit_prop(buffer, "max_engine_clk_fcompute", + kfd2kgd->get_max_engine_clock_in_mhz( + dev->gpu->kgd)); + sysfs_show_64bit_prop(buffer, "local_mem_size", + kfd2kgd->get_vmem_size(dev->gpu->kgd)); + } + + ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute", + cpufreq_quick_get_max(0)/1000); + } + + return ret; +} + +static const struct sysfs_ops node_ops = { + .show = node_show, +}; + +static struct kobj_type node_type = { + .sysfs_ops = &node_ops, +}; + +static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) +{ + sysfs_remove_file(kobj, attr); + kobject_del(kobj); + kobject_put(kobj); +} + +static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) +{ + struct kfd_iolink_properties *iolink; + struct kfd_cache_properties *cache; + struct kfd_mem_properties *mem; + + BUG_ON(!dev); + + if (dev->kobj_iolink) { + list_for_each_entry(iolink, &dev->io_link_props, list) + if (iolink->kobj) { + kfd_remove_sysfs_file(iolink->kobj, + &iolink->attr); + iolink->kobj = NULL; + } + kobject_del(dev->kobj_iolink); + kobject_put(dev->kobj_iolink); + dev->kobj_iolink = NULL; + } + + if (dev->kobj_cache) { + list_for_each_entry(cache, &dev->cache_props, list) + if (cache->kobj) { + kfd_remove_sysfs_file(cache->kobj, + &cache->attr); + cache->kobj = NULL; + } + kobject_del(dev->kobj_cache); + kobject_put(dev->kobj_cache); + dev->kobj_cache = NULL; + } + + if (dev->kobj_mem) { + list_for_each_entry(mem, &dev->mem_props, list) + if (mem->kobj) { + kfd_remove_sysfs_file(mem->kobj, &mem->attr); + mem->kobj = NULL; + } + kobject_del(dev->kobj_mem); + kobject_put(dev->kobj_mem); + dev->kobj_mem = NULL; + } + + if (dev->kobj_node) { + sysfs_remove_file(dev->kobj_node, &dev->attr_gpuid); + sysfs_remove_file(dev->kobj_node, &dev->attr_name); + sysfs_remove_file(dev->kobj_node, &dev->attr_props); + kobject_del(dev->kobj_node); + kobject_put(dev->kobj_node); + dev->kobj_node = NULL; + } +} + +static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, + uint32_t id) +{ + struct kfd_iolink_properties *iolink; + struct kfd_cache_properties *cache; + struct kfd_mem_properties *mem; + int ret; + uint32_t i; + + BUG_ON(!dev); + + /* + * Creating the sysfs folders + */ + BUG_ON(dev->kobj_node); + dev->kobj_node = kfd_alloc_struct(dev->kobj_node); + if (!dev->kobj_node) + return -ENOMEM; + + ret = kobject_init_and_add(dev->kobj_node, &node_type, + sys_props.kobj_nodes, "%d", id); + if (ret < 0) + return ret; + + dev->kobj_mem = kobject_create_and_add("mem_banks", dev->kobj_node); + if (!dev->kobj_mem) + return -ENOMEM; + + dev->kobj_cache = kobject_create_and_add("caches", dev->kobj_node); + if (!dev->kobj_cache) + return -ENOMEM; + + dev->kobj_iolink = kobject_create_and_add("io_links", dev->kobj_node); + if (!dev->kobj_iolink) + return -ENOMEM; + + /* + * Creating sysfs files for node properties + */ + dev->attr_gpuid.name = "gpu_id"; + dev->attr_gpuid.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&dev->attr_gpuid); + dev->attr_name.name = "name"; + dev->attr_name.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&dev->attr_name); + dev->attr_props.name = "properties"; + dev->attr_props.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&dev->attr_props); + ret = sysfs_create_file(dev->kobj_node, &dev->attr_gpuid); + if (ret < 0) + return ret; + ret = sysfs_create_file(dev->kobj_node, &dev->attr_name); + if (ret < 0) + return ret; + ret = sysfs_create_file(dev->kobj_node, &dev->attr_props); + if (ret < 0) + return ret; + + i = 0; + list_for_each_entry(mem, &dev->mem_props, list) { + mem->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!mem->kobj) + return -ENOMEM; + ret = kobject_init_and_add(mem->kobj, &mem_type, + dev->kobj_mem, "%d", i); + if (ret < 0) + return ret; + + mem->attr.name = "properties"; + mem->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&mem->attr); + ret = sysfs_create_file(mem->kobj, &mem->attr); + if (ret < 0) + return ret; + i++; + } + + i = 0; + list_for_each_entry(cache, &dev->cache_props, list) { + cache->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!cache->kobj) + return -ENOMEM; + ret = kobject_init_and_add(cache->kobj, &cache_type, + dev->kobj_cache, "%d", i); + if (ret < 0) + return ret; + + cache->attr.name = "properties"; + cache->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&cache->attr); + ret = sysfs_create_file(cache->kobj, &cache->attr); + if (ret < 0) + return ret; + i++; + } + + i = 0; + list_for_each_entry(iolink, &dev->io_link_props, list) { + iolink->kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!iolink->kobj) + return -ENOMEM; + ret = kobject_init_and_add(iolink->kobj, &iolink_type, + dev->kobj_iolink, "%d", i); + if (ret < 0) + return ret; + + iolink->attr.name = "properties"; + iolink->attr.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&iolink->attr); + ret = sysfs_create_file(iolink->kobj, &iolink->attr); + if (ret < 0) + return ret; + i++; +} + + return 0; +} + +static int kfd_build_sysfs_node_tree(void) +{ + struct kfd_topology_device *dev; + int ret; + uint32_t i = 0; + + list_for_each_entry(dev, &topology_device_list, list) { + ret = kfd_build_sysfs_node_entry(dev, 0); + if (ret < 0) + return ret; + i++; + } + + return 0; +} + +static void kfd_remove_sysfs_node_tree(void) +{ + struct kfd_topology_device *dev; + + list_for_each_entry(dev, &topology_device_list, list) + kfd_remove_sysfs_node_entry(dev); +} + +static int kfd_topology_update_sysfs(void) +{ + int ret; + + pr_info("Creating topology SYSFS entries\n"); + if (sys_props.kobj_topology == NULL) { + sys_props.kobj_topology = + kfd_alloc_struct(sys_props.kobj_topology); + if (!sys_props.kobj_topology) + return -ENOMEM; + + ret = kobject_init_and_add(sys_props.kobj_topology, + &sysprops_type, &kfd_device->kobj, + "topology"); + if (ret < 0) + return ret; + + sys_props.kobj_nodes = kobject_create_and_add("nodes", + sys_props.kobj_topology); + if (!sys_props.kobj_nodes) + return -ENOMEM; + + sys_props.attr_genid.name = "generation_id"; + sys_props.attr_genid.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&sys_props.attr_genid); + ret = sysfs_create_file(sys_props.kobj_topology, + &sys_props.attr_genid); + if (ret < 0) + return ret; + + sys_props.attr_props.name = "system_properties"; + sys_props.attr_props.mode = KFD_SYSFS_FILE_MODE; + sysfs_attr_init(&sys_props.attr_props); + ret = sysfs_create_file(sys_props.kobj_topology, + &sys_props.attr_props); + if (ret < 0) + return ret; + } + + kfd_remove_sysfs_node_tree(); + + return kfd_build_sysfs_node_tree(); +} + +static void kfd_topology_release_sysfs(void) +{ + kfd_remove_sysfs_node_tree(); + if (sys_props.kobj_topology) { + sysfs_remove_file(sys_props.kobj_topology, + &sys_props.attr_genid); + sysfs_remove_file(sys_props.kobj_topology, + &sys_props.attr_props); + if (sys_props.kobj_nodes) { + kobject_del(sys_props.kobj_nodes); + kobject_put(sys_props.kobj_nodes); + sys_props.kobj_nodes = NULL; + } + kobject_del(sys_props.kobj_topology); + kobject_put(sys_props.kobj_topology); + sys_props.kobj_topology = NULL; + } +} + +int kfd_topology_init(void) +{ + void *crat_image = NULL; + size_t image_size = 0; + int ret; + + /* + * Initialize the head for the topology device list + */ + INIT_LIST_HEAD(&topology_device_list); + init_rwsem(&topology_lock); + topology_crat_parsed = 0; + + memset(&sys_props, 0, sizeof(sys_props)); + + /* + * Get the CRAT image from the ACPI + */ + ret = kfd_topology_get_crat_acpi(crat_image, &image_size); + if (ret == 0 && image_size > 0) { + pr_info("Found CRAT image with size=%zd\n", image_size); + crat_image = kmalloc(image_size, GFP_KERNEL); + if (!crat_image) { + ret = -ENOMEM; + pr_err("No memory for allocating CRAT image\n"); + goto err; + } + ret = kfd_topology_get_crat_acpi(crat_image, &image_size); + + if (ret == 0) { + down_write(&topology_lock); + ret = kfd_parse_crat_table(crat_image); + if (ret == 0) + ret = kfd_topology_update_sysfs(); + up_write(&topology_lock); + } else { + pr_err("Couldn't get CRAT table size from ACPI\n"); + } + kfree(crat_image); + } else if (ret == -ENODATA) { + ret = 0; + } else { + pr_err("Couldn't get CRAT table size from ACPI\n"); + } + +err: + pr_info("Finished initializing topology ret=%d\n", ret); + return ret; +} + +void kfd_topology_shutdown(void) +{ + kfd_topology_release_sysfs(); + kfd_release_live_view(); +} + +static void kfd_debug_print_topology(void) +{ + struct kfd_topology_device *dev; + uint32_t i = 0; + + pr_info("DEBUG PRINT OF TOPOLOGY:"); + list_for_each_entry(dev, &topology_device_list, list) { + pr_info("Node: %d\n", i); + pr_info("\tGPU assigned: %s\n", (dev->gpu ? "yes" : "no")); + pr_info("\tCPU count: %d\n", dev->node_props.cpu_cores_count); + pr_info("\tSIMD count: %d", dev->node_props.simd_count); + i++; + } +} + +static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) +{ + uint32_t hashout; + uint32_t buf[7]; + int i; + + if (!gpu) + return 0; + + buf[0] = gpu->pdev->devfn; + buf[1] = gpu->pdev->subsystem_vendor; + buf[2] = gpu->pdev->subsystem_device; + buf[3] = gpu->pdev->device; + buf[4] = gpu->pdev->bus->number; + buf[5] = (uint32_t)(kfd2kgd->get_vmem_size(gpu->kgd) & 0xffffffff); + buf[6] = (uint32_t)(kfd2kgd->get_vmem_size(gpu->kgd) >> 32); + + for (i = 0, hashout = 0; i < 7; i++) + hashout ^= hash_32(buf[i], KFD_GPU_ID_HASH_WIDTH); + + return hashout; +} + +static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu) +{ + struct kfd_topology_device *dev; + struct kfd_topology_device *out_dev = NULL; + + BUG_ON(!gpu); + + list_for_each_entry(dev, &topology_device_list, list) + if (dev->gpu == NULL && dev->node_props.simd_count > 0) { + dev->gpu = gpu; + out_dev = dev; + break; + } + + return out_dev; +} + +static void kfd_notify_gpu_change(uint32_t gpu_id, int arrival) +{ + /* + * TODO: Generate an event for thunk about the arrival/removal + * of the GPU + */ +} + +int kfd_topology_add_device(struct kfd_dev *gpu) +{ + uint32_t gpu_id; + struct kfd_topology_device *dev; + int res; + + BUG_ON(!gpu); + + gpu_id = kfd_generate_gpu_id(gpu); + + pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id); + + down_write(&topology_lock); + /* + * Try to assign the GPU to existing topology device (generated from + * CRAT table + */ + dev = kfd_assign_gpu(gpu); + if (!dev) { + pr_info("GPU was not found in the current topology. Extending.\n"); + kfd_debug_print_topology(); + dev = kfd_create_topology_device(); + if (!dev) { + res = -ENOMEM; + goto err; + } + dev->gpu = gpu; + + /* + * TODO: Make a call to retrieve topology information from the + * GPU vBIOS + */ + + /* + * Update the SYSFS tree, since we added another topology device + */ + if (kfd_topology_update_sysfs() < 0) + kfd_topology_release_sysfs(); + + } + + dev->gpu_id = gpu_id; + gpu->id = gpu_id; + dev->node_props.vendor_id = gpu->pdev->vendor; + dev->node_props.device_id = gpu->pdev->device; + dev->node_props.location_id = (gpu->pdev->bus->number << 24) + + (gpu->pdev->devfn & 0xffffff); + /* + * TODO: Retrieve max engine clock values from KGD + */ + + res = 0; + +err: + up_write(&topology_lock); + + if (res == 0) + kfd_notify_gpu_change(gpu_id, 1); + + return res; +} + +int kfd_topology_remove_device(struct kfd_dev *gpu) +{ + struct kfd_topology_device *dev; + uint32_t gpu_id; + int res = -ENODEV; + + BUG_ON(!gpu); + + down_write(&topology_lock); + + list_for_each_entry(dev, &topology_device_list, list) + if (dev->gpu == gpu) { + gpu_id = dev->gpu_id; + kfd_remove_sysfs_node_entry(dev); + kfd_release_topology_device(dev); + res = 0; + if (kfd_topology_update_sysfs() < 0) + kfd_topology_release_sysfs(); + break; + } + + up_write(&topology_lock); + + if (res == 0) + kfd_notify_gpu_change(gpu_id, 0); + + return res; +} + +/* + * When idx is out of bounds, the function will return NULL + */ +struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx) +{ + + struct kfd_topology_device *top_dev; + struct kfd_dev *device = NULL; + uint8_t device_idx = 0; + + down_read(&topology_lock); + + list_for_each_entry(top_dev, &topology_device_list, list) { + if (device_idx == idx) { + device = top_dev->gpu; + break; + } + + device_idx++; + } + + up_read(&topology_lock); + + return device; + +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h new file mode 100644 index 0000000000000000000000000000000000000000..989624b3cd14841422b004028df7cbda9ee5be86 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -0,0 +1,168 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef __KFD_TOPOLOGY_H__ +#define __KFD_TOPOLOGY_H__ + +#include +#include +#include "kfd_priv.h" + +#define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 + +#define HSA_CAP_HOT_PLUGGABLE 0x00000001 +#define HSA_CAP_ATS_PRESENT 0x00000002 +#define HSA_CAP_SHARED_WITH_GRAPHICS 0x00000004 +#define HSA_CAP_QUEUE_SIZE_POW2 0x00000008 +#define HSA_CAP_QUEUE_SIZE_32BIT 0x00000010 +#define HSA_CAP_QUEUE_IDLE_EVENT 0x00000020 +#define HSA_CAP_VA_LIMIT 0x00000040 +#define HSA_CAP_WATCH_POINTS_SUPPORTED 0x00000080 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00 +#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 +#define HSA_CAP_RESERVED 0xfffff000 + +struct kfd_node_properties { + uint32_t cpu_cores_count; + uint32_t simd_count; + uint32_t mem_banks_count; + uint32_t caches_count; + uint32_t io_links_count; + uint32_t cpu_core_id_base; + uint32_t simd_id_base; + uint32_t capability; + uint32_t max_waves_per_simd; + uint32_t lds_size_in_kb; + uint32_t gds_size_in_kb; + uint32_t wave_front_size; + uint32_t array_count; + uint32_t simd_arrays_per_engine; + uint32_t cu_per_simd_array; + uint32_t simd_per_cu; + uint32_t max_slots_scratch_cu; + uint32_t engine_id; + uint32_t vendor_id; + uint32_t device_id; + uint32_t location_id; + uint32_t max_engine_clk_fcompute; + uint32_t max_engine_clk_ccompute; + uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; +}; + +#define HSA_MEM_HEAP_TYPE_SYSTEM 0 +#define HSA_MEM_HEAP_TYPE_FB_PUBLIC 1 +#define HSA_MEM_HEAP_TYPE_FB_PRIVATE 2 +#define HSA_MEM_HEAP_TYPE_GPU_GDS 3 +#define HSA_MEM_HEAP_TYPE_GPU_LDS 4 +#define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 + +#define HSA_MEM_FLAGS_HOT_PLUGGABLE 0x00000001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x00000002 +#define HSA_MEM_FLAGS_RESERVED 0xfffffffc + +struct kfd_mem_properties { + struct list_head list; + uint32_t heap_type; + uint64_t size_in_bytes; + uint32_t flags; + uint32_t width; + uint32_t mem_clk_max; + struct kobject *kobj; + struct attribute attr; +}; + +#define KFD_TOPOLOGY_CPU_SIBLINGS 256 + +#define HSA_CACHE_TYPE_DATA 0x00000001 +#define HSA_CACHE_TYPE_INSTRUCTION 0x00000002 +#define HSA_CACHE_TYPE_CPU 0x00000004 +#define HSA_CACHE_TYPE_HSACU 0x00000008 +#define HSA_CACHE_TYPE_RESERVED 0xfffffff0 + +struct kfd_cache_properties { + struct list_head list; + uint32_t processor_id_low; + uint32_t cache_level; + uint32_t cache_size; + uint32_t cacheline_size; + uint32_t cachelines_per_tag; + uint32_t cache_assoc; + uint32_t cache_latency; + uint32_t cache_type; + uint8_t sibling_map[KFD_TOPOLOGY_CPU_SIBLINGS]; + struct kobject *kobj; + struct attribute attr; +}; + +struct kfd_iolink_properties { + struct list_head list; + uint32_t iolink_type; + uint32_t ver_maj; + uint32_t ver_min; + uint32_t node_from; + uint32_t node_to; + uint32_t weight; + uint32_t min_latency; + uint32_t max_latency; + uint32_t min_bandwidth; + uint32_t max_bandwidth; + uint32_t rec_transfer_size; + uint32_t flags; + struct kobject *kobj; + struct attribute attr; +}; + +struct kfd_topology_device { + struct list_head list; + uint32_t gpu_id; + struct kfd_node_properties node_props; + uint32_t mem_bank_count; + struct list_head mem_props; + uint32_t cache_count; + struct list_head cache_props; + uint32_t io_link_count; + struct list_head io_link_props; + struct kfd_dev *gpu; + struct kobject *kobj_node; + struct kobject *kobj_mem; + struct kobject *kobj_cache; + struct kobject *kobj_iolink; + struct attribute attr_gpuid; + struct attribute attr_name; + struct attribute attr_props; +}; + +struct kfd_system_properties { + uint32_t num_devices; /* Number of H-NUMA nodes */ + uint32_t generation_count; + uint64_t platform_oem; + uint64_t platform_id; + uint64_t platform_rev; + struct kobject *kobj_topology; + struct kobject *kobj_nodes; + struct attribute attr_genid; + struct attribute attr_props; +}; + + + +#endif /* __KFD_TOPOLOGY_H__ */ diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..9c729dd8dd50a077d35f77ed3646438fb80d5c86 --- /dev/null +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -0,0 +1,185 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * This file defines the private interface between the + * AMD kernel graphics drivers and the AMD KFD. + */ + +#ifndef KGD_KFD_INTERFACE_H_INCLUDED +#define KGD_KFD_INTERFACE_H_INCLUDED + +#include + +struct pci_dev; + +#define KFD_INTERFACE_VERSION 1 + +struct kfd_dev; +struct kgd_dev; + +struct kgd_mem; + +enum kgd_memory_pool { + KGD_POOL_SYSTEM_CACHEABLE = 1, + KGD_POOL_SYSTEM_WRITECOMBINE = 2, + KGD_POOL_FRAMEBUFFER = 3, +}; + +struct kgd2kfd_shared_resources { + /* Bit n == 1 means VMID n is available for KFD. */ + unsigned int compute_vmid_bitmap; + + /* Compute pipes are counted starting from MEC0/pipe0 as 0. */ + unsigned int first_compute_pipe; + + /* Number of MEC pipes available for KFD. */ + unsigned int compute_pipe_count; + + /* Base address of doorbell aperture. */ + phys_addr_t doorbell_physical_address; + + /* Size in bytes of doorbell aperture. */ + size_t doorbell_aperture_size; + + /* Number of bytes at start of aperture reserved for KGD. */ + size_t doorbell_start_offset; +}; + +/** + * struct kgd2kfd_calls + * + * @exit: Notifies amdkfd that kgd module is unloaded + * + * @probe: Notifies amdkfd about a probe done on a device in the kgd driver. + * + * @device_init: Initialize the newly probed device (if it is a device that + * amdkfd supports) + * + * @device_exit: Notifies amdkfd about a removal of a kgd device + * + * @suspend: Notifies amdkfd about a suspend action done to a kgd device + * + * @resume: Notifies amdkfd about a resume action done to a kgd device + * + * This structure contains function callback pointers so the kgd driver + * will notify to the amdkfd about certain status changes. + * + */ +struct kgd2kfd_calls { + void (*exit)(void); + struct kfd_dev* (*probe)(struct kgd_dev *kgd, struct pci_dev *pdev); + bool (*device_init)(struct kfd_dev *kfd, + const struct kgd2kfd_shared_resources *gpu_resources); + void (*device_exit)(struct kfd_dev *kfd); + void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); + void (*suspend)(struct kfd_dev *kfd); + int (*resume)(struct kfd_dev *kfd); +}; + +/** + * struct kfd2kgd_calls + * + * @init_sa_manager: Initialize an instance of the sa manager, used by + * amdkfd for all system memory allocations that are mapped to the GART + * address space + * + * @fini_sa_manager: Releases all memory allocations for amdkfd that are + * handled by kgd sa manager + * + * @allocate_mem: Allocate a buffer from amdkfd's sa manager. The buffer can + * be used for mqds, hpds, kernel queue, fence and runlists + * + * @free_mem: Frees a buffer that was allocated by amdkfd's sa manager + * + * @get_vmem_size: Retrieves (physical) size of VRAM + * + * @get_gpu_clock_counter: Retrieves GPU clock counter + * + * @get_max_engine_clock_in_mhz: Retrieves maximum GPU clock in MHz + * + * @program_sh_mem_settings: A function that should initiate the memory + * properties such as main aperture memory type (cache / non cached) and + * secondary aperture base address, size and memory type. + * This function is used only for no cp scheduling mode. + * + * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp + * scheduling mode. Only used for no cp scheduling mode. + * + * @init_memory: Initializes memory apertures to fixed base/limit address + * and non cached memory types. + * + * @init_pipeline: Initialized the compute pipelines. + * + * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp + * sceduling mode. + * + * @hqd_is_occupies: Checks if a hqd slot is occupied. + * + * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot. + * + * This structure contains function pointers to services that the kgd driver + * provides to amdkfd driver. + * + */ +struct kfd2kgd_calls { + /* Memory management. */ + int (*init_sa_manager)(struct kgd_dev *kgd, unsigned int size); + void (*fini_sa_manager)(struct kgd_dev *kgd); + int (*allocate_mem)(struct kgd_dev *kgd, size_t size, size_t alignment, + enum kgd_memory_pool pool, struct kgd_mem **mem); + + void (*free_mem)(struct kgd_dev *kgd, struct kgd_mem *mem); + + uint64_t (*get_vmem_size)(struct kgd_dev *kgd); + uint64_t (*get_gpu_clock_counter)(struct kgd_dev *kgd); + + uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd); + + /* Register access functions */ + void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); + + int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); + + int (*init_memory)(struct kgd_dev *kgd); + int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + + int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr); + + bool (*hqd_is_occupies)(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + + int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); +}; + +bool kgd2kfd_init(unsigned interface_version, + const struct kfd2kgd_calls *f2g, + const struct kgd2kfd_calls **g2f); + +#endif /* KGD_KFD_INTERFACE_H_INCLUDED */ diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c index e4a1490b42c280bbc49fd8cfcdc51a4c84b473fe..e3a7a5078e5ce1d5469fa740cc9bde74551ad090 100644 --- a/drivers/gpu/drm/armada/armada_crtc.c +++ b/drivers/gpu/drm/armada/armada_crtc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "armada_crtc.h" #include "armada_drm.h" #include "armada_fb.h" diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 9dc0fd5c1ea4ef7741eaad8e85a65ea280096caf..b7ee2634e47cb420cb476c0f2a5906681b826601 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "ast_drv.h" #include "ast_tables.h" diff --git a/drivers/gpu/drm/bochs/bochs_fbdev.c b/drivers/gpu/drm/bochs/bochs_fbdev.c index fe95d31cd1101118ab8be6174621fbd4692526f9..61dbf09dff5dca3549460039322406eb73fb7bf7 100644 --- a/drivers/gpu/drm/bochs/bochs_fbdev.c +++ b/drivers/gpu/drm/bochs/bochs_fbdev.c @@ -9,6 +9,17 @@ /* ---------------------------------------------------------------------- */ +static int bochsfb_mmap(struct fb_info *info, + struct vm_area_struct *vma) +{ + struct drm_fb_helper *fb_helper = info->par; + struct bochs_device *bochs = + container_of(fb_helper, struct bochs_device, fb.helper); + struct bochs_bo *bo = gem_to_bochs_bo(bochs->fb.gfb.obj); + + return ttm_fbdev_mmap(vma, &bo->bo); +} + static struct fb_ops bochsfb_ops = { .owner = THIS_MODULE, .fb_check_var = drm_fb_helper_check_var, @@ -19,6 +30,7 @@ static struct fb_ops bochsfb_ops = { .fb_pan_display = drm_fb_helper_pan_display, .fb_blank = drm_fb_helper_blank, .fb_setcmap = drm_fb_helper_setcmap, + .fb_mmap = bochsfb_mmap, }; static int bochsfb_create_object(struct bochs_device *bochs, @@ -123,11 +135,9 @@ static int bochsfb_create(struct drm_fb_helper *helper, info->screen_base = bo->kmap.virtual; info->screen_size = size; -#if 0 - /* FIXME: get this right for mmap(/dev/fb0) */ - info->fix.smem_start = bochs_bo_mmap_offset(bo); + drm_vma_offset_remove(&bo->bo.bdev->vma_manager, &bo->bo.vma_node); + info->fix.smem_start = 0; info->fix.smem_len = size; -#endif ret = fb_alloc_cmap(&info->cmap, 256, 0); if (ret) { diff --git a/drivers/gpu/drm/bochs/bochs_hw.c b/drivers/gpu/drm/bochs/bochs_hw.c index dbe619e6aab4e28e2c5d58b8ee87f1bb91b7eb5c..460389702d31cdca34a56b9e20c93f6ad4ad6f05 100644 --- a/drivers/gpu/drm/bochs/bochs_hw.c +++ b/drivers/gpu/drm/bochs/bochs_hw.c @@ -51,11 +51,10 @@ int bochs_hw_init(struct drm_device *dev, uint32_t flags) { struct bochs_device *bochs = dev->dev_private; struct pci_dev *pdev = dev->pdev; - unsigned long addr, size, mem, ioaddr, iosize; + unsigned long addr, size, mem, ioaddr, iosize, qext_size; u16 id; - if (/* (ent->driver_data == BOCHS_QEMU_STDVGA) && */ - (pdev->resource[2].flags & IORESOURCE_MEM)) { + if (pdev->resource[2].flags & IORESOURCE_MEM) { /* mmio bar with vga and bochs registers present */ if (pci_request_region(pdev, 2, "bochs-drm") != 0) { DRM_ERROR("Cannot request mmio region\n"); @@ -116,6 +115,24 @@ int bochs_hw_init(struct drm_device *dev, uint32_t flags) size / 1024, addr, bochs->ioports ? "ioports" : "mmio", ioaddr); + + if (bochs->mmio && pdev->revision >= 2) { + qext_size = readl(bochs->mmio + 0x600); + if (qext_size < 4 || qext_size > iosize) + goto noext; + DRM_DEBUG("Found qemu ext regs, size %ld\n", qext_size); + if (qext_size >= 8) { +#ifdef __BIG_ENDIAN + writel(0xbebebebe, bochs->mmio + 0x604); +#else + writel(0x1e1e1e1e, bochs->mmio + 0x604); +#endif + DRM_DEBUG(" qext endian: 0x%x\n", + readl(bochs->mmio + 0x604)); + } + } + +noext: return 0; } diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index 6b7efcf363d61df33e493326b54daa8ce6f36825..85f0f8cf1fb82974f14f5491acaca9dae02cfeb0 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -6,6 +6,7 @@ */ #include "bochs.h" +#include static int defx = 1024; static int defy = 768; @@ -108,11 +109,32 @@ static void bochs_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, { } +static int bochs_crtc_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t page_flip_flags) +{ + struct bochs_device *bochs = + container_of(crtc, struct bochs_device, crtc); + struct drm_framebuffer *old_fb = crtc->primary->fb; + unsigned long irqflags; + + crtc->primary->fb = fb; + bochs_crtc_mode_set_base(crtc, 0, 0, old_fb); + if (event) { + spin_lock_irqsave(&bochs->dev->event_lock, irqflags); + drm_send_vblank_event(bochs->dev, -1, event); + spin_unlock_irqrestore(&bochs->dev->event_lock, irqflags); + } + return 0; +} + /* These provide the minimum set of functions required to handle a CRTC */ static const struct drm_crtc_funcs bochs_crtc_funcs = { .gamma_set = bochs_crtc_gamma_set, .set_config = drm_crtc_helper_set_config, .destroy = drm_crtc_cleanup, + .page_flip = bochs_crtc_page_flip, }; static const struct drm_crtc_helper_funcs bochs_helper_funcs = { diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h index d44e69daa23966c0e2f38332069a81cc24070d0a..693a4565c4ffb2a0629d48ec206ba3bb4accf830 100644 --- a/drivers/gpu/drm/cirrus/cirrus_drv.h +++ b/drivers/gpu/drm/cirrus/cirrus_drv.h @@ -210,6 +210,9 @@ int cirrus_framebuffer_init(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj); +bool cirrus_check_framebuffer(struct cirrus_device *cdev, int width, int height, + int bpp, int pitch); + /* cirrus_display.c */ int cirrus_modeset_init(struct cirrus_device *cdev); void cirrus_modeset_fini(struct cirrus_device *cdev); diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c index d231b1c317afac94476e572b9560566311a38168..502a89eb54b51a7f146e9e61e54f03a0051c9b2d 100644 --- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c +++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c @@ -139,6 +139,7 @@ static int cirrusfb_create_object(struct cirrus_fbdev *afbdev, struct drm_gem_object **gobj_p) { struct drm_device *dev = afbdev->helper.dev; + struct cirrus_device *cdev = dev->dev_private; u32 bpp, depth; u32 size; struct drm_gem_object *gobj; @@ -146,8 +147,10 @@ static int cirrusfb_create_object(struct cirrus_fbdev *afbdev, int ret = 0; drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp); - if (bpp > 24) + if (!cirrus_check_framebuffer(cdev, mode_cmd->width, mode_cmd->height, + bpp, mode_cmd->pitches[0])) return -EINVAL; + size = mode_cmd->pitches[0] * mode_cmd->height; ret = cirrus_gem_create(dev, size, true, &gobj); if (ret) diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c index 99c1983f99d228b77ce9c498236ad68b348440e2..4c2d68e9102d6304b8fd5cc655266300706f32da 100644 --- a/drivers/gpu/drm/cirrus/cirrus_main.c +++ b/drivers/gpu/drm/cirrus/cirrus_main.c @@ -49,14 +49,16 @@ cirrus_user_framebuffer_create(struct drm_device *dev, struct drm_file *filp, struct drm_mode_fb_cmd2 *mode_cmd) { + struct cirrus_device *cdev = dev->dev_private; struct drm_gem_object *obj; struct cirrus_framebuffer *cirrus_fb; int ret; u32 bpp, depth; drm_fb_get_bpp_depth(mode_cmd->pixel_format, &depth, &bpp); - /* cirrus can't handle > 24bpp framebuffers at all */ - if (bpp > 24) + + if (!cirrus_check_framebuffer(cdev, mode_cmd->width, mode_cmd->height, + bpp, mode_cmd->pitches[0])) return ERR_PTR(-EINVAL); obj = drm_gem_object_lookup(dev, filp, mode_cmd->handles[0]); @@ -96,8 +98,7 @@ static int cirrus_vram_init(struct cirrus_device *cdev) { /* BAR 0 is VRAM */ cdev->mc.vram_base = pci_resource_start(cdev->dev->pdev, 0); - /* We have 4MB of VRAM */ - cdev->mc.vram_size = 4 * 1024 * 1024; + cdev->mc.vram_size = pci_resource_len(cdev->dev->pdev, 0); if (!request_mem_region(cdev->mc.vram_base, cdev->mc.vram_size, "cirrusdrmfb_vram")) { @@ -179,17 +180,22 @@ int cirrus_driver_load(struct drm_device *dev, unsigned long flags) } r = cirrus_mm_init(cdev); - if (r) + if (r) { dev_err(&dev->pdev->dev, "fatal err on mm init\n"); + goto out; + } r = cirrus_modeset_init(cdev); - if (r) + if (r) { dev_err(&dev->pdev->dev, "Fatal error during modeset init: %d\n", r); + goto out; + } dev->mode_config.funcs = (void *)&cirrus_mode_funcs; + + return 0; out: - if (r) - cirrus_driver_unload(dev); + cirrus_driver_unload(dev); return r; } @@ -307,3 +313,21 @@ cirrus_dumb_mmap_offset(struct drm_file *file, return ret; } + +bool cirrus_check_framebuffer(struct cirrus_device *cdev, int width, int height, + int bpp, int pitch) +{ + const int max_pitch = 0x1FF << 3; /* (4096 - 1) & ~111b bytes */ + const int max_size = cdev->mc.vram_size; + + if (bpp > 32) + return false; + + if (pitch > max_pitch) + return false; + + if (pitch * height > max_size) + return false; + + return true; +} diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c index c7c5a9d91fa0082b2967a55fb8614bcdb6f74a1d..99d4a74ffeaffd2582ca78353ae2156a90c796f5 100644 --- a/drivers/gpu/drm/cirrus/cirrus_mode.c +++ b/drivers/gpu/drm/cirrus/cirrus_mode.c @@ -16,6 +16,7 @@ */ #include #include +#include #include