1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * arch/parisc/lib/io.c
4  *
5  * Copyright (c) Matthew Wilcox 2001 for Hewlett-Packard
6  * Copyright (c) Randolph Chung 2001 <[email protected]>
7  *
8  * IO accessing functions which shouldn't be inlined because they're too big
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <asm/io.h>
14 
15 /*
16 ** Copies a block of memory from a device in an efficient manner.
17 ** Assumes the device can cope with 32-bit transfers.  If it can't,
18 ** don't use this function.
19 **
20 ** CR16 counts on C3000 reading 256 bytes from Symbios 896 RAM:
21 **	27341/64    = 427 cyc per int
22 **	61311/128   = 478 cyc per short
23 **	122637/256  = 479 cyc per byte
24 ** Ergo bus latencies dominant (not transfer size).
25 **      Minimize total number of transfers at cost of CPU cycles.
26 **	TODO: only look at src alignment and adjust the stores to dest.
27 */
memcpy_fromio(void * dst,const volatile void __iomem * src,int count)28 void memcpy_fromio(void *dst, const volatile void __iomem *src, int count)
29 {
30 	/* first compare alignment of src/dst */
31 	if ( (((unsigned long)dst ^ (unsigned long)src) & 1) || (count < 2) )
32 		goto bytecopy;
33 
34 	if ( (((unsigned long)dst ^ (unsigned long)src) & 2) || (count < 4) )
35 		goto shortcopy;
36 
37 	/* Then check for misaligned start address */
38 	if ((unsigned long)src & 1) {
39 		*(u8 *)dst = readb(src);
40 		src++;
41 		dst++;
42 		count--;
43 		if (count < 2) goto bytecopy;
44 	}
45 
46 	if ((unsigned long)src & 2) {
47 		*(u16 *)dst = __raw_readw(src);
48 		src += 2;
49 		dst += 2;
50 		count -= 2;
51 	}
52 
53 	while (count > 3) {
54 		*(u32 *)dst = __raw_readl(src);
55 		dst += 4;
56 		src += 4;
57 		count -= 4;
58 	}
59 
60  shortcopy:
61 	while (count > 1) {
62 		*(u16 *)dst = __raw_readw(src);
63 		src += 2;
64 		dst += 2;
65 		count -= 2;
66 	}
67 
68  bytecopy:
69 	while (count--) {
70 		*(char *)dst = readb(src);
71 		src++;
72 		dst++;
73 	}
74 }
75 
76 /*
77  * Read COUNT 8-bit bytes from port PORT into memory starting at
78  * SRC.
79  */
insb(unsigned long port,void * dst,unsigned long count)80 void insb (unsigned long port, void *dst, unsigned long count)
81 {
82 	unsigned char *p;
83 
84 	p = (unsigned char *)dst;
85 
86 	while (((unsigned long)p) & 0x3) {
87 		if (!count)
88 			return;
89 		count--;
90 		*p = inb(port);
91 		p++;
92 	}
93 
94 	while (count >= 4) {
95 		unsigned int w;
96 		count -= 4;
97 		w = inb(port) << 24;
98 		w |= inb(port) << 16;
99 		w |= inb(port) << 8;
100 		w |= inb(port);
101 		*(unsigned int *) p = w;
102 		p += 4;
103 	}
104 
105 	while (count) {
106 		--count;
107 		*p = inb(port);
108 		p++;
109 	}
110 }
111 
112 
113 /*
114  * Read COUNT 16-bit words from port PORT into memory starting at
115  * SRC.  SRC must be at least short aligned.  This is used by the
116  * IDE driver to read disk sectors.  Performance is important, but
117  * the interfaces seems to be slow: just using the inlined version
118  * of the inw() breaks things.
119  */
insw(unsigned long port,void * dst,unsigned long count)120 void insw (unsigned long port, void *dst, unsigned long count)
121 {
122 	unsigned int l = 0, l2;
123 	unsigned char *p;
124 
125 	p = (unsigned char *)dst;
126 
127 	if (!count)
128 		return;
129 
130 	switch (((unsigned long)p) & 0x3)
131 	{
132 	 case 0x00:			/* Buffer 32-bit aligned */
133 		while (count>=2) {
134 
135 			count -= 2;
136 			l = cpu_to_le16(inw(port)) << 16;
137 			l |= cpu_to_le16(inw(port));
138 			*(unsigned int *)p = l;
139 			p += 4;
140 		}
141 		if (count) {
142 			*(unsigned short *)p = cpu_to_le16(inw(port));
143 		}
144 		break;
145 
146 	 case 0x02:			/* Buffer 16-bit aligned */
147 		*(unsigned short *)p = cpu_to_le16(inw(port));
148 		p += 2;
149 		count--;
150 		while (count>=2) {
151 
152 			count -= 2;
153 			l = cpu_to_le16(inw(port)) << 16;
154 			l |= cpu_to_le16(inw(port));
155 			*(unsigned int *)p = l;
156 			p += 4;
157 		}
158 		if (count) {
159 			*(unsigned short *)p = cpu_to_le16(inw(port));
160 		}
161 		break;
162 
163 	 case 0x01:			/* Buffer 8-bit aligned */
164 	 case 0x03:
165 		/* I don't bother with 32bit transfers
166 		 * in this case, 16bit will have to do -- DE */
167 		--count;
168 
169 		l = cpu_to_le16(inw(port));
170 		*p = l >> 8;
171 		p++;
172 		while (count--)
173 		{
174 			l2 = cpu_to_le16(inw(port));
175 			*(unsigned short *)p = (l & 0xff) << 8 | (l2 >> 8);
176 			p += 2;
177 			l = l2;
178 		}
179 		*p = l & 0xff;
180 		break;
181 	}
182 }
183 
184 
185 
186 /*
187  * Read COUNT 32-bit words from port PORT into memory starting at
188  * SRC. Now works with any alignment in SRC. Performance is important,
189  * but the interfaces seems to be slow: just using the inlined version
190  * of the inl() breaks things.
191  */
insl(unsigned long port,void * dst,unsigned long count)192 void insl (unsigned long port, void *dst, unsigned long count)
193 {
194 	unsigned int l = 0, l2;
195 	unsigned char *p;
196 
197 	p = (unsigned char *)dst;
198 
199 	if (!count)
200 		return;
201 
202 	switch (((unsigned long) dst) & 0x3)
203 	{
204 	 case 0x00:			/* Buffer 32-bit aligned */
205 		while (count--)
206 		{
207 			*(unsigned int *)p = cpu_to_le32(inl(port));
208 			p += 4;
209 		}
210 		break;
211 
212 	 case 0x02:			/* Buffer 16-bit aligned */
213 		--count;
214 
215 		l = cpu_to_le32(inl(port));
216 		*(unsigned short *)p = l >> 16;
217 		p += 2;
218 
219 		while (count--)
220 		{
221 			l2 = cpu_to_le32(inl(port));
222 			*(unsigned int *)p = (l & 0xffff) << 16 | (l2 >> 16);
223 			p += 4;
224 			l = l2;
225 		}
226 		*(unsigned short *)p = l & 0xffff;
227 		break;
228 	 case 0x01:			/* Buffer 8-bit aligned */
229 		--count;
230 
231 		l = cpu_to_le32(inl(port));
232 		*(unsigned char *)p = l >> 24;
233 		p++;
234 		*(unsigned short *)p = (l >> 8) & 0xffff;
235 		p += 2;
236 		while (count--)
237 		{
238 			l2 = cpu_to_le32(inl(port));
239 			*(unsigned int *)p = (l & 0xff) << 24 | (l2 >> 8);
240 			p += 4;
241 			l = l2;
242 		}
243 		*p = l & 0xff;
244 		break;
245 	 case 0x03:			/* Buffer 8-bit aligned */
246 		--count;
247 
248 		l = cpu_to_le32(inl(port));
249 		*p = l >> 24;
250 		p++;
251 		while (count--)
252 		{
253 			l2 = cpu_to_le32(inl(port));
254 			*(unsigned int *)p = (l & 0xffffff) << 8 | l2 >> 24;
255 			p += 4;
256 			l = l2;
257 		}
258 		*(unsigned short *)p = (l >> 8) & 0xffff;
259 		p += 2;
260 		*p = l & 0xff;
261 		break;
262 	}
263 }
264 
265 
266 /*
267  * Like insb but in the opposite direction.
268  * Don't worry as much about doing aligned memory transfers:
269  * doing byte reads the "slow" way isn't nearly as slow as
270  * doing byte writes the slow way (no r-m-w cycle).
271  */
outsb(unsigned long port,const void * src,unsigned long count)272 void outsb(unsigned long port, const void * src, unsigned long count)
273 {
274 	const unsigned char *p;
275 
276 	p = (const unsigned char *)src;
277 	while (count) {
278 		count--;
279 		outb(*p, port);
280 		p++;
281 	}
282 }
283 
284 /*
285  * Like insw but in the opposite direction.  This is used by the IDE
286  * driver to write disk sectors.  Performance is important, but the
287  * interfaces seems to be slow: just using the inlined version of the
288  * outw() breaks things.
289  */
outsw(unsigned long port,const void * src,unsigned long count)290 void outsw (unsigned long port, const void *src, unsigned long count)
291 {
292 	unsigned int l = 0, l2;
293 	const unsigned char *p;
294 
295 	p = (const unsigned char *)src;
296 
297 	if (!count)
298 		return;
299 
300 	switch (((unsigned long)p) & 0x3)
301 	{
302 	 case 0x00:			/* Buffer 32-bit aligned */
303 		while (count>=2) {
304 			count -= 2;
305 			l = *(unsigned int *)p;
306 			p += 4;
307 			outw(le16_to_cpu(l >> 16), port);
308 			outw(le16_to_cpu(l & 0xffff), port);
309 		}
310 		if (count) {
311 			outw(le16_to_cpu(*(unsigned short*)p), port);
312 		}
313 		break;
314 
315 	 case 0x02:			/* Buffer 16-bit aligned */
316 
317 		outw(le16_to_cpu(*(unsigned short*)p), port);
318 		p += 2;
319 		count--;
320 
321 		while (count>=2) {
322 			count -= 2;
323 			l = *(unsigned int *)p;
324 			p += 4;
325 			outw(le16_to_cpu(l >> 16), port);
326 			outw(le16_to_cpu(l & 0xffff), port);
327 		}
328 		if (count) {
329 			outw(le16_to_cpu(*(unsigned short *)p), port);
330 		}
331 		break;
332 
333 	 case 0x01:			/* Buffer 8-bit aligned */
334 		/* I don't bother with 32bit transfers
335 		 * in this case, 16bit will have to do -- DE */
336 
337 		l  = *p << 8;
338 		p++;
339 		count--;
340 		while (count)
341 		{
342 			count--;
343 			l2 = *(unsigned short *)p;
344 			p += 2;
345 			outw(le16_to_cpu(l | l2 >> 8), port);
346 		        l = l2 << 8;
347 		}
348 		l2 = *(unsigned char *)p;
349 		outw (le16_to_cpu(l | l2>>8), port);
350 		break;
351 
352 	}
353 }
354 
355 
356 /*
357  * Like insl but in the opposite direction.  This is used by the IDE
358  * driver to write disk sectors.  Works with any alignment in SRC.
359  *  Performance is important, but the interfaces seems to be slow:
360  * just using the inlined version of the outl() breaks things.
361  */
outsl(unsigned long port,const void * src,unsigned long count)362 void outsl (unsigned long port, const void *src, unsigned long count)
363 {
364 	unsigned int l = 0, l2;
365 	const unsigned char *p;
366 
367 	p = (const unsigned char *)src;
368 
369 	if (!count)
370 		return;
371 
372 	switch (((unsigned long)p) & 0x3)
373 	{
374 	 case 0x00:			/* Buffer 32-bit aligned */
375 		while (count--)
376 		{
377 			outl(le32_to_cpu(*(unsigned int *)p), port);
378 			p += 4;
379 		}
380 		break;
381 
382 	 case 0x02:			/* Buffer 16-bit aligned */
383 		--count;
384 
385 		l = *(unsigned short *)p;
386 		p += 2;
387 
388 		while (count--)
389 		{
390 			l2 = *(unsigned int *)p;
391 			p += 4;
392 			outl (le32_to_cpu(l << 16 | l2 >> 16), port);
393 			l = l2;
394 		}
395 		l2 = *(unsigned short *)p;
396 		outl (le32_to_cpu(l << 16 | l2), port);
397 		break;
398 	 case 0x01:			/* Buffer 8-bit aligned */
399 		--count;
400 
401 		l = *p << 24;
402 		p++;
403 		l |= *(unsigned short *)p << 8;
404 		p += 2;
405 
406 		while (count--)
407 		{
408 			l2 = *(unsigned int *)p;
409 			p += 4;
410 			outl (le32_to_cpu(l | l2 >> 24), port);
411 			l = l2 << 8;
412 		}
413 		l2 = *p;
414 		outl (le32_to_cpu(l | l2), port);
415 		break;
416 	 case 0x03:			/* Buffer 8-bit aligned */
417 		--count;
418 
419 		l = *p << 24;
420 		p++;
421 
422 		while (count--)
423 		{
424 			l2 = *(unsigned int *)p;
425 			p += 4;
426 			outl (le32_to_cpu(l | l2 >> 8), port);
427 			l = l2 << 24;
428 		}
429 		l2 = *(unsigned short *)p << 16;
430 		p += 2;
431 		l2 |= *p;
432 		outl (le32_to_cpu(l | l2), port);
433 		break;
434 	}
435 }
436 
437 EXPORT_SYMBOL(insb);
438 EXPORT_SYMBOL(insw);
439 EXPORT_SYMBOL(insl);
440 EXPORT_SYMBOL(outsb);
441 EXPORT_SYMBOL(outsw);
442 EXPORT_SYMBOL(outsl);
443